Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"A quiet cycle after the larger 5.8 effort. Substantially cleanup and
driver work with a few smaller features this time.

- Driver updates for hfi1, rxe, mlx5, hns, qedr, usnic, bnxt_re

- Removal of dead or redundant code across the drivers

- RAW resource tracker dumps to include a device specific data blob
for device objects to aide device debugging

- Further advance the IOCTL interface, remove the ability to turn it
off. Add QUERY_CONTEXT, QUERY_MR, and QUERY_PD commands

- Remove stubs related to devices with no pkey table

- A shared CQ scheme to allow multiple ULPs to share the CQ rings of
a device to give higher performance

- Several more static checker, syzkaller and rare crashers fixed"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (121 commits)
RDMA/mlx5: Fix flow destination setting for RDMA TX flow table
RDMA/rxe: Remove pkey table
RDMA/umem: Add a schedule point in ib_umem_get()
RDMA/hns: Fix the unneeded process when getting a general type of CQE error
RDMA/hns: Fix error during modify qp RTS2RTS
RDMA/hns: Delete unnecessary memset when allocating VF resource
RDMA/hns: Remove redundant parameters in set_rc_wqe()
RDMA/hns: Remove support for HIP08_A
RDMA/hns: Refactor hns_roce_v2_set_hem()
RDMA/hns: Remove redundant hardware opcode definitions
RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP
RDMA/include: Replace license text with SPDX tags
RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq
RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting
RDMA/cma: Execute rdma_cm destruction from a handler properly
RDMA/cma: Remove unneeded locking for req paths
RDMA/cma: Using the standard locking pattern when delivering the removal event
RDMA/cma: Simplify DEVICE_REMOVAL for internal_id
RDMA/efa: Add EFA 0xefa1 PCI ID
RDMA/efa: User/kernel compatibility handshake mechanism
...

+6385 -7009
+1
MAINTAINERS
··· 3621 3621 M: Devesh Sharma <devesh.sharma@broadcom.com> 3622 3622 M: Somnath Kotur <somnath.kotur@broadcom.com> 3623 3623 M: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> 3624 + M: Naresh Kumar PBS <nareshkumar.pbs@broadcom.com> 3624 3625 L: linux-rdma@vger.kernel.org 3625 3626 S: Supported 3626 3627 W: http://www.broadcom.com
-8
drivers/infiniband/Kconfig
··· 37 37 libibverbs, libibcm and a hardware driver library from 38 38 rdma-core <https://github.com/linux-rdma/rdma-core>. 39 39 40 - config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI 41 - bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)" 42 - depends on INFINIBAND_USER_ACCESS 43 - help 44 - IOCTL based uAPI support for Infiniband is enabled by default for 45 - new verbs only. This allows userspace to invoke the IOCTL based uAPI 46 - for current legacy verbs too. 47 - 48 40 config INFINIBAND_USER_MEM 49 41 bool 50 42 depends on INFINIBAND_USER_ACCESS != n
+30 -17
drivers/infiniband/core/cache.c
··· 1054 1054 1055 1055 cache = device->port_data[port_num].cache.pkey; 1056 1056 1057 - if (index < 0 || index >= cache->table_len) 1057 + if (!cache || index < 0 || index >= cache->table_len) 1058 1058 ret = -EINVAL; 1059 1059 else 1060 1060 *pkey = cache->table[index]; ··· 1099 1099 read_lock_irqsave(&device->cache_lock, flags); 1100 1100 1101 1101 cache = device->port_data[port_num].cache.pkey; 1102 + if (!cache) { 1103 + ret = -EINVAL; 1104 + goto err; 1105 + } 1102 1106 1103 1107 *index = -1; 1104 1108 ··· 1121 1117 ret = 0; 1122 1118 } 1123 1119 1120 + err: 1124 1121 read_unlock_irqrestore(&device->cache_lock, flags); 1125 1122 1126 1123 return ret; ··· 1144 1139 read_lock_irqsave(&device->cache_lock, flags); 1145 1140 1146 1141 cache = device->port_data[port_num].cache.pkey; 1142 + if (!cache) { 1143 + ret = -EINVAL; 1144 + goto err; 1145 + } 1147 1146 1148 1147 *index = -1; 1149 1148 ··· 1158 1149 break; 1159 1150 } 1160 1151 1152 + err: 1161 1153 read_unlock_irqrestore(&device->cache_lock, flags); 1162 1154 1163 1155 return ret; ··· 1435 1425 goto err; 1436 1426 } 1437 1427 1438 - pkey_cache = kmalloc(struct_size(pkey_cache, table, 1439 - tprops->pkey_tbl_len), 1440 - GFP_KERNEL); 1441 - if (!pkey_cache) { 1442 - ret = -ENOMEM; 1443 - goto err; 1444 - } 1445 - 1446 - pkey_cache->table_len = tprops->pkey_tbl_len; 1447 - 1448 - for (i = 0; i < pkey_cache->table_len; ++i) { 1449 - ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1450 - if (ret) { 1451 - dev_warn(&device->dev, 1452 - "ib_query_pkey failed (%d) for index %d\n", 1453 - ret, i); 1428 + if (tprops->pkey_tbl_len) { 1429 + pkey_cache = kmalloc(struct_size(pkey_cache, table, 1430 + tprops->pkey_tbl_len), 1431 + GFP_KERNEL); 1432 + if (!pkey_cache) { 1433 + ret = -ENOMEM; 1454 1434 goto err; 1435 + } 1436 + 1437 + pkey_cache->table_len = tprops->pkey_tbl_len; 1438 + 1439 + for (i = 0; i < pkey_cache->table_len; ++i) { 1440 + ret = ib_query_pkey(device, port, i, 1441 + pkey_cache->table + i); 1442 + if (ret) { 1443 + dev_warn(&device->dev, 1444 + "ib_query_pkey failed (%d) for index %d\n", 1445 + ret, i); 1446 + goto err; 1447 + } 1455 1448 } 1456 1449 } 1457 1450
+123 -134
drivers/infiniband/core/cma.c
··· 428 428 return ret; 429 429 } 430 430 431 - static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 432 - enum rdma_cm_state exch) 433 - { 434 - unsigned long flags; 435 - enum rdma_cm_state old; 436 - 437 - spin_lock_irqsave(&id_priv->lock, flags); 438 - old = id_priv->state; 439 - id_priv->state = exch; 440 - spin_unlock_irqrestore(&id_priv->lock, flags); 441 - return old; 442 - } 443 - 444 431 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 445 432 { 446 433 return hdr->ip_version >> 4; ··· 1816 1829 } 1817 1830 } 1818 1831 1819 - void rdma_destroy_id(struct rdma_cm_id *id) 1832 + static void _destroy_id(struct rdma_id_private *id_priv, 1833 + enum rdma_cm_state state) 1820 1834 { 1821 - struct rdma_id_private *id_priv; 1822 - enum rdma_cm_state state; 1823 - 1824 - id_priv = container_of(id, struct rdma_id_private, id); 1825 - trace_cm_id_destroy(id_priv); 1826 - state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1827 1835 cma_cancel_operation(id_priv, state); 1828 - 1829 - /* 1830 - * Wait for any active callback to finish. New callbacks will find 1831 - * the id_priv state set to destroying and abort. 1832 - */ 1833 - mutex_lock(&id_priv->handler_mutex); 1834 - mutex_unlock(&id_priv->handler_mutex); 1835 1836 1836 1837 rdma_restrack_del(&id_priv->res); 1837 1838 if (id_priv->cma_dev) { ··· 1848 1873 1849 1874 put_net(id_priv->id.route.addr.dev_addr.net); 1850 1875 kfree(id_priv); 1876 + } 1877 + 1878 + /* 1879 + * destroy an ID from within the handler_mutex. This ensures that no other 1880 + * handlers can start running concurrently. 1881 + */ 1882 + static void destroy_id_handler_unlock(struct rdma_id_private *id_priv) 1883 + __releases(&idprv->handler_mutex) 1884 + { 1885 + enum rdma_cm_state state; 1886 + unsigned long flags; 1887 + 1888 + trace_cm_id_destroy(id_priv); 1889 + 1890 + /* 1891 + * Setting the state to destroyed under the handler mutex provides a 1892 + * fence against calling handler callbacks. If this is invoked due to 1893 + * the failure of a handler callback then it guarentees that no future 1894 + * handlers will be called. 1895 + */ 1896 + lockdep_assert_held(&id_priv->handler_mutex); 1897 + spin_lock_irqsave(&id_priv->lock, flags); 1898 + state = id_priv->state; 1899 + id_priv->state = RDMA_CM_DESTROYING; 1900 + spin_unlock_irqrestore(&id_priv->lock, flags); 1901 + mutex_unlock(&id_priv->handler_mutex); 1902 + _destroy_id(id_priv, state); 1903 + } 1904 + 1905 + void rdma_destroy_id(struct rdma_cm_id *id) 1906 + { 1907 + struct rdma_id_private *id_priv = 1908 + container_of(id, struct rdma_id_private, id); 1909 + 1910 + mutex_lock(&id_priv->handler_mutex); 1911 + destroy_id_handler_unlock(id_priv); 1851 1912 } 1852 1913 EXPORT_SYMBOL(rdma_destroy_id); 1853 1914 ··· 1936 1925 { 1937 1926 int ret; 1938 1927 1928 + lockdep_assert_held(&id_priv->handler_mutex); 1929 + 1939 1930 trace_cm_event_handler(id_priv, event); 1940 1931 ret = id_priv->id.event_handler(&id_priv->id, event); 1941 1932 trace_cm_event_done(id_priv, event, ret); ··· 1949 1936 { 1950 1937 struct rdma_id_private *id_priv = cm_id->context; 1951 1938 struct rdma_cm_event event = {}; 1952 - int ret = 0; 1939 + int ret; 1953 1940 1954 1941 mutex_lock(&id_priv->handler_mutex); 1955 1942 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && ··· 2018 2005 if (ret) { 2019 2006 /* Destroy the CM ID by returning a non-zero value. */ 2020 2007 id_priv->cm_id.ib = NULL; 2021 - cma_exch(id_priv, RDMA_CM_DESTROYING); 2022 - mutex_unlock(&id_priv->handler_mutex); 2023 - rdma_destroy_id(&id_priv->id); 2008 + destroy_id_handler_unlock(id_priv); 2024 2009 return ret; 2025 2010 } 2026 2011 out: 2027 2012 mutex_unlock(&id_priv->handler_mutex); 2028 - return ret; 2013 + return 0; 2029 2014 } 2030 2015 2031 2016 static struct rdma_id_private * ··· 2185 2174 mutex_lock(&listen_id->handler_mutex); 2186 2175 if (listen_id->state != RDMA_CM_LISTEN) { 2187 2176 ret = -ECONNABORTED; 2188 - goto err1; 2177 + goto err_unlock; 2189 2178 } 2190 2179 2191 2180 offset = cma_user_data_offset(listen_id); ··· 2202 2191 } 2203 2192 if (!conn_id) { 2204 2193 ret = -ENOMEM; 2205 - goto err1; 2194 + goto err_unlock; 2206 2195 } 2207 2196 2208 2197 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2209 2198 ret = cma_ib_acquire_dev(conn_id, listen_id, &req); 2210 - if (ret) 2211 - goto err2; 2199 + if (ret) { 2200 + destroy_id_handler_unlock(conn_id); 2201 + goto err_unlock; 2202 + } 2212 2203 2213 2204 conn_id->cm_id.ib = cm_id; 2214 2205 cm_id->context = conn_id; 2215 2206 cm_id->cm_handler = cma_ib_handler; 2216 2207 2217 - /* 2218 - * Protect against the user destroying conn_id from another thread 2219 - * until we're done accessing it. 2220 - */ 2221 - cma_id_get(conn_id); 2222 2208 ret = cma_cm_event_handler(conn_id, &event); 2223 - if (ret) 2224 - goto err3; 2225 - /* 2226 - * Acquire mutex to prevent user executing rdma_destroy_id() 2227 - * while we're accessing the cm_id. 2228 - */ 2229 - mutex_lock(&lock); 2209 + if (ret) { 2210 + /* Destroy the CM ID by returning a non-zero value. */ 2211 + conn_id->cm_id.ib = NULL; 2212 + mutex_unlock(&listen_id->handler_mutex); 2213 + destroy_id_handler_unlock(conn_id); 2214 + goto net_dev_put; 2215 + } 2216 + 2230 2217 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2231 2218 (conn_id->id.qp_type != IB_QPT_UD)) { 2232 2219 trace_cm_send_mra(cm_id->context); 2233 2220 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2234 2221 } 2235 - mutex_unlock(&lock); 2236 2222 mutex_unlock(&conn_id->handler_mutex); 2237 - mutex_unlock(&listen_id->handler_mutex); 2238 - cma_id_put(conn_id); 2239 - if (net_dev) 2240 - dev_put(net_dev); 2241 - return 0; 2242 2223 2243 - err3: 2244 - cma_id_put(conn_id); 2245 - /* Destroy the CM ID by returning a non-zero value. */ 2246 - conn_id->cm_id.ib = NULL; 2247 - err2: 2248 - cma_exch(conn_id, RDMA_CM_DESTROYING); 2249 - mutex_unlock(&conn_id->handler_mutex); 2250 - err1: 2224 + err_unlock: 2251 2225 mutex_unlock(&listen_id->handler_mutex); 2252 - if (conn_id) 2253 - rdma_destroy_id(&conn_id->id); 2254 2226 2255 2227 net_dev_put: 2256 2228 if (net_dev) ··· 2333 2339 if (ret) { 2334 2340 /* Destroy the CM ID by returning a non-zero value. */ 2335 2341 id_priv->cm_id.iw = NULL; 2336 - cma_exch(id_priv, RDMA_CM_DESTROYING); 2337 - mutex_unlock(&id_priv->handler_mutex); 2338 - rdma_destroy_id(&id_priv->id); 2342 + destroy_id_handler_unlock(id_priv); 2339 2343 return ret; 2340 2344 } 2341 2345 ··· 2380 2388 2381 2389 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2382 2390 if (ret) { 2383 - mutex_unlock(&conn_id->handler_mutex); 2384 - rdma_destroy_id(new_cm_id); 2385 - goto out; 2391 + mutex_unlock(&listen_id->handler_mutex); 2392 + destroy_id_handler_unlock(conn_id); 2393 + return ret; 2386 2394 } 2387 2395 2388 2396 ret = cma_iw_acquire_dev(conn_id, listen_id); 2389 2397 if (ret) { 2390 - mutex_unlock(&conn_id->handler_mutex); 2391 - rdma_destroy_id(new_cm_id); 2392 - goto out; 2398 + mutex_unlock(&listen_id->handler_mutex); 2399 + destroy_id_handler_unlock(conn_id); 2400 + return ret; 2393 2401 } 2394 2402 2395 2403 conn_id->cm_id.iw = cm_id; ··· 2399 2407 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2400 2408 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2401 2409 2402 - /* 2403 - * Protect against the user destroying conn_id from another thread 2404 - * until we're done accessing it. 2405 - */ 2406 - cma_id_get(conn_id); 2407 2410 ret = cma_cm_event_handler(conn_id, &event); 2408 2411 if (ret) { 2409 2412 /* User wants to destroy the CM ID */ 2410 2413 conn_id->cm_id.iw = NULL; 2411 - cma_exch(conn_id, RDMA_CM_DESTROYING); 2412 - mutex_unlock(&conn_id->handler_mutex); 2413 2414 mutex_unlock(&listen_id->handler_mutex); 2414 - cma_id_put(conn_id); 2415 - rdma_destroy_id(&conn_id->id); 2415 + destroy_id_handler_unlock(conn_id); 2416 2416 return ret; 2417 2417 } 2418 2418 2419 2419 mutex_unlock(&conn_id->handler_mutex); 2420 - cma_id_put(conn_id); 2421 2420 2422 2421 out: 2423 2422 mutex_unlock(&listen_id->handler_mutex); ··· 2464 2481 struct rdma_cm_event *event) 2465 2482 { 2466 2483 struct rdma_id_private *id_priv = id->context; 2484 + 2485 + /* Listening IDs are always destroyed on removal */ 2486 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 2487 + return -1; 2467 2488 2468 2489 id->context = id_priv->id.context; 2469 2490 id->event_handler = id_priv->id.event_handler; ··· 2644 2657 { 2645 2658 struct cma_work *work = container_of(_work, struct cma_work, work); 2646 2659 struct rdma_id_private *id_priv = work->id; 2647 - int destroy = 0; 2648 2660 2649 2661 mutex_lock(&id_priv->handler_mutex); 2650 2662 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2651 - goto out; 2663 + goto out_unlock; 2652 2664 2653 2665 if (cma_cm_event_handler(id_priv, &work->event)) { 2654 - cma_exch(id_priv, RDMA_CM_DESTROYING); 2655 - destroy = 1; 2666 + cma_id_put(id_priv); 2667 + destroy_id_handler_unlock(id_priv); 2668 + goto out_free; 2656 2669 } 2657 - out: 2670 + 2671 + out_unlock: 2658 2672 mutex_unlock(&id_priv->handler_mutex); 2659 2673 cma_id_put(id_priv); 2660 - if (destroy) 2661 - rdma_destroy_id(&id_priv->id); 2674 + out_free: 2662 2675 kfree(work); 2663 2676 } 2664 2677 ··· 2666 2679 { 2667 2680 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 2668 2681 struct rdma_id_private *id_priv = work->id; 2669 - int destroy = 0; 2670 2682 2671 2683 mutex_lock(&id_priv->handler_mutex); 2672 2684 if (id_priv->state == RDMA_CM_DESTROYING || 2673 2685 id_priv->state == RDMA_CM_DEVICE_REMOVAL) 2674 - goto out; 2686 + goto out_unlock; 2675 2687 2676 2688 if (cma_cm_event_handler(id_priv, &work->event)) { 2677 - cma_exch(id_priv, RDMA_CM_DESTROYING); 2678 - destroy = 1; 2689 + cma_id_put(id_priv); 2690 + destroy_id_handler_unlock(id_priv); 2691 + goto out_free; 2679 2692 } 2680 2693 2681 - out: 2694 + out_unlock: 2682 2695 mutex_unlock(&id_priv->handler_mutex); 2683 2696 cma_id_put(id_priv); 2684 - if (destroy) 2685 - rdma_destroy_id(&id_priv->id); 2697 + out_free: 2686 2698 kfree(work); 2687 2699 } 2688 2700 ··· 3157 3171 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 3158 3172 3159 3173 if (cma_cm_event_handler(id_priv, &event)) { 3160 - cma_exch(id_priv, RDMA_CM_DESTROYING); 3161 - mutex_unlock(&id_priv->handler_mutex); 3162 - rdma_destroy_id(&id_priv->id); 3174 + destroy_id_handler_unlock(id_priv); 3163 3175 return; 3164 3176 } 3165 3177 out: ··· 3774 3790 struct rdma_cm_event event = {}; 3775 3791 const struct ib_cm_sidr_rep_event_param *rep = 3776 3792 &ib_event->param.sidr_rep_rcvd; 3777 - int ret = 0; 3793 + int ret; 3778 3794 3779 3795 mutex_lock(&id_priv->handler_mutex); 3780 3796 if (id_priv->state != RDMA_CM_CONNECT) ··· 3824 3840 if (ret) { 3825 3841 /* Destroy the CM ID by returning a non-zero value. */ 3826 3842 id_priv->cm_id.ib = NULL; 3827 - cma_exch(id_priv, RDMA_CM_DESTROYING); 3828 - mutex_unlock(&id_priv->handler_mutex); 3829 - rdma_destroy_id(&id_priv->id); 3843 + destroy_id_handler_unlock(id_priv); 3830 3844 return ret; 3831 3845 } 3832 3846 out: 3833 3847 mutex_unlock(&id_priv->handler_mutex); 3834 - return ret; 3848 + return 0; 3835 3849 } 3836 3850 3837 3851 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, ··· 4354 4372 4355 4373 rdma_destroy_ah_attr(&event.param.ud.ah_attr); 4356 4374 if (ret) { 4357 - cma_exch(id_priv, RDMA_CM_DESTROYING); 4358 - mutex_unlock(&id_priv->handler_mutex); 4359 - rdma_destroy_id(&id_priv->id); 4375 + destroy_id_handler_unlock(id_priv); 4360 4376 return 0; 4361 4377 } 4362 4378 ··· 4769 4789 return ret; 4770 4790 } 4771 4791 4772 - static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4792 + static void cma_send_device_removal_put(struct rdma_id_private *id_priv) 4773 4793 { 4774 - struct rdma_cm_event event = {}; 4794 + struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; 4775 4795 enum rdma_cm_state state; 4776 - int ret = 0; 4796 + unsigned long flags; 4777 4797 4778 - /* Record that we want to remove the device */ 4779 - state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4780 - if (state == RDMA_CM_DESTROYING) 4781 - return 0; 4782 - 4783 - cma_cancel_operation(id_priv, state); 4784 4798 mutex_lock(&id_priv->handler_mutex); 4799 + /* Record that we want to remove the device */ 4800 + spin_lock_irqsave(&id_priv->lock, flags); 4801 + state = id_priv->state; 4802 + if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) { 4803 + spin_unlock_irqrestore(&id_priv->lock, flags); 4804 + mutex_unlock(&id_priv->handler_mutex); 4805 + cma_id_put(id_priv); 4806 + return; 4807 + } 4808 + id_priv->state = RDMA_CM_DEVICE_REMOVAL; 4809 + spin_unlock_irqrestore(&id_priv->lock, flags); 4785 4810 4786 - /* Check for destruction from another callback. */ 4787 - if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4788 - goto out; 4789 - 4790 - event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4791 - ret = cma_cm_event_handler(id_priv, &event); 4792 - out: 4811 + if (cma_cm_event_handler(id_priv, &event)) { 4812 + /* 4813 + * At this point the ULP promises it won't call 4814 + * rdma_destroy_id() concurrently 4815 + */ 4816 + cma_id_put(id_priv); 4817 + mutex_unlock(&id_priv->handler_mutex); 4818 + trace_cm_id_destroy(id_priv); 4819 + _destroy_id(id_priv, state); 4820 + return; 4821 + } 4793 4822 mutex_unlock(&id_priv->handler_mutex); 4794 - return ret; 4823 + 4824 + /* 4825 + * If this races with destroy then the thread that first assigns state 4826 + * to a destroying does the cancel. 4827 + */ 4828 + cma_cancel_operation(id_priv, state); 4829 + cma_id_put(id_priv); 4795 4830 } 4796 4831 4797 4832 static void cma_process_remove(struct cma_device *cma_dev) 4798 4833 { 4799 - struct rdma_id_private *id_priv; 4800 - int ret; 4801 - 4802 4834 mutex_lock(&lock); 4803 4835 while (!list_empty(&cma_dev->id_list)) { 4804 - id_priv = list_entry(cma_dev->id_list.next, 4805 - struct rdma_id_private, list); 4836 + struct rdma_id_private *id_priv = list_first_entry( 4837 + &cma_dev->id_list, struct rdma_id_private, list); 4806 4838 4807 4839 list_del(&id_priv->listen_list); 4808 4840 list_del_init(&id_priv->list); 4809 4841 cma_id_get(id_priv); 4810 4842 mutex_unlock(&lock); 4811 4843 4812 - ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4813 - cma_id_put(id_priv); 4814 - if (ret) 4815 - rdma_destroy_id(&id_priv->id); 4844 + cma_send_device_removal_put(id_priv); 4816 4845 4817 4846 mutex_lock(&lock); 4818 4847 }
+7 -17
drivers/infiniband/core/counters.c
··· 8 8 #include "core_priv.h" 9 9 #include "restrack.h" 10 10 11 - #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) 11 + #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 12 13 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 14 enum rdma_nl_counter_mode new_mode, ··· 149 149 struct auto_mode_param *param = &counter->mode.param; 150 150 bool match = true; 151 151 152 - /* 153 - * Ensure that counter belongs to the right PID. This operation can 154 - * race with user space which kills the process and leaves QP and 155 - * counters orphans. 156 - * 157 - * It is not a big deal because exitted task will leave both QP and 158 - * counter in the same bucket of zombie process. Just ensure that 159 - * process is still alive before procedding. 160 - * 161 - */ 162 - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || 163 - !task_pid_nr(qp->res.task)) 164 - return false; 165 - 166 152 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 167 153 match &= (param->qp_type == qp->qp_type); 154 + 155 + if (auto_mask & RDMA_COUNTER_MASK_PID) 156 + match &= (task_pid_nr(counter->res.task) == 157 + task_pid_nr(qp->res.task)); 168 158 169 159 return match; 170 160 } ··· 278 288 struct rdma_counter *counter; 279 289 int ret; 280 290 281 - if (!qp->res.valid) 291 + if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) 282 292 return 0; 283 293 284 294 if (!rdma_is_port_valid(dev, port)) ··· 473 483 goto err; 474 484 } 475 485 476 - if (counter->res.task != qp->res.task) { 486 + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 477 487 ret = -EINVAL; 478 488 goto err_task; 479 489 }
+22 -6
drivers/infiniband/core/device.c
··· 272 272 } mandatory_table[] = { 273 273 IB_MANDATORY_FUNC(query_device), 274 274 IB_MANDATORY_FUNC(query_port), 275 - IB_MANDATORY_FUNC(query_pkey), 276 275 IB_MANDATORY_FUNC(alloc_pd), 277 276 IB_MANDATORY_FUNC(dealloc_pd), 278 277 IB_MANDATORY_FUNC(create_qp), ··· 1342 1343 return ret; 1343 1344 } 1344 1345 1346 + static void prevent_dealloc_device(struct ib_device *ib_dev) 1347 + { 1348 + } 1349 + 1345 1350 /** 1346 1351 * ib_register_device - Register an IB device with IB core 1347 1352 * @device: Device to register ··· 1416 1413 * possibility for a parallel unregistration along with this 1417 1414 * error flow. Since we have a refcount here we know any 1418 1415 * parallel flow is stopped in disable_device and will see the 1419 - * NULL pointers, causing the responsibility to 1416 + * special dealloc_driver pointer, causing the responsibility to 1420 1417 * ib_dealloc_device() to revert back to this thread. 1421 1418 */ 1422 1419 dealloc_fn = device->ops.dealloc_driver; 1423 - device->ops.dealloc_driver = NULL; 1420 + device->ops.dealloc_driver = prevent_dealloc_device; 1424 1421 ib_device_put(device); 1425 1422 __ib_unregister_device(device); 1426 1423 device->ops.dealloc_driver = dealloc_fn; ··· 1469 1466 * Drivers using the new flow may not call ib_dealloc_device except 1470 1467 * in error unwind prior to registration success. 1471 1468 */ 1472 - if (ib_dev->ops.dealloc_driver) { 1469 + if (ib_dev->ops.dealloc_driver && 1470 + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { 1473 1471 WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); 1474 1472 ib_dealloc_device(ib_dev); 1475 1473 } ··· 2365 2361 if (!rdma_is_port_valid(device, port_num)) 2366 2362 return -EINVAL; 2367 2363 2364 + if (!device->ops.query_pkey) 2365 + return -EOPNOTSUPP; 2366 + 2368 2367 return device->ops.query_pkey(device, port_num, index, pkey); 2369 2368 } 2370 2369 EXPORT_SYMBOL(ib_query_pkey); ··· 2628 2621 SET_DEVICE_OP(dev_ops, drain_rq); 2629 2622 SET_DEVICE_OP(dev_ops, drain_sq); 2630 2623 SET_DEVICE_OP(dev_ops, enable_driver); 2631 - SET_DEVICE_OP(dev_ops, fill_res_entry); 2632 - SET_DEVICE_OP(dev_ops, fill_stat_entry); 2624 + SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); 2625 + SET_DEVICE_OP(dev_ops, fill_res_cq_entry); 2626 + SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); 2627 + SET_DEVICE_OP(dev_ops, fill_res_mr_entry); 2628 + SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); 2629 + SET_DEVICE_OP(dev_ops, fill_res_qp_entry); 2630 + SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); 2631 + SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); 2633 2632 SET_DEVICE_OP(dev_ops, get_dev_fw_str); 2634 2633 SET_DEVICE_OP(dev_ops, get_dma_mr); 2635 2634 SET_DEVICE_OP(dev_ops, get_hw_stats); ··· 2680 2667 SET_DEVICE_OP(dev_ops, query_port); 2681 2668 SET_DEVICE_OP(dev_ops, query_qp); 2682 2669 SET_DEVICE_OP(dev_ops, query_srq); 2670 + SET_DEVICE_OP(dev_ops, query_ucontext); 2683 2671 SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); 2684 2672 SET_DEVICE_OP(dev_ops, read_counters); 2685 2673 SET_DEVICE_OP(dev_ops, reg_dm_mr); ··· 2693 2679 SET_DEVICE_OP(dev_ops, set_vf_link_state); 2694 2680 2695 2681 SET_OBJ_SIZE(dev_ops, ib_ah); 2682 + SET_OBJ_SIZE(dev_ops, ib_counters); 2696 2683 SET_OBJ_SIZE(dev_ops, ib_cq); 2697 2684 SET_OBJ_SIZE(dev_ops, ib_pd); 2698 2685 SET_OBJ_SIZE(dev_ops, ib_srq); 2699 2686 SET_OBJ_SIZE(dev_ops, ib_ucontext); 2687 + SET_OBJ_SIZE(dev_ops, ib_xrcd); 2700 2688 } 2701 2689 EXPORT_SYMBOL(ib_set_device_ops); 2702 2690
+15 -15
drivers/infiniband/core/mad.c
··· 402 402 INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); 403 403 INIT_LIST_HEAD(&mad_agent_priv->local_list); 404 404 INIT_WORK(&mad_agent_priv->local_work, local_completions); 405 - atomic_set(&mad_agent_priv->refcount, 1); 405 + refcount_set(&mad_agent_priv->refcount, 1); 406 406 init_completion(&mad_agent_priv->comp); 407 407 408 408 ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); ··· 484 484 485 485 static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) 486 486 { 487 - if (atomic_dec_and_test(&mad_agent_priv->refcount)) 487 + if (refcount_dec_and_test(&mad_agent_priv->refcount)) 488 488 complete(&mad_agent_priv->comp); 489 489 } 490 490 ··· 718 718 * Reference MAD agent until receive 719 719 * side of local completion handled 720 720 */ 721 - atomic_inc(&mad_agent_priv->refcount); 721 + refcount_inc(&mad_agent_priv->refcount); 722 722 } else 723 723 kfree(mad_priv); 724 724 break; ··· 758 758 local->return_wc_byte_len = mad_size; 759 759 } 760 760 /* Reference MAD agent until send side of local completion handled */ 761 - atomic_inc(&mad_agent_priv->refcount); 761 + refcount_inc(&mad_agent_priv->refcount); 762 762 /* Queue local completion to local list */ 763 763 spin_lock_irqsave(&mad_agent_priv->lock, flags); 764 764 list_add_tail(&local->completion_list, &mad_agent_priv->local_list); ··· 916 916 } 917 917 918 918 mad_send_wr->send_buf.mad_agent = mad_agent; 919 - atomic_inc(&mad_agent_priv->refcount); 919 + refcount_inc(&mad_agent_priv->refcount); 920 920 return &mad_send_wr->send_buf; 921 921 } 922 922 EXPORT_SYMBOL(ib_create_send_mad); ··· 1131 1131 mad_send_wr->status = IB_WC_SUCCESS; 1132 1132 1133 1133 /* Reference MAD agent until send completes */ 1134 - atomic_inc(&mad_agent_priv->refcount); 1134 + refcount_inc(&mad_agent_priv->refcount); 1135 1135 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1136 1136 list_add_tail(&mad_send_wr->agent_list, 1137 1137 &mad_agent_priv->send_list); ··· 1148 1148 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1149 1149 list_del(&mad_send_wr->agent_list); 1150 1150 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1151 - atomic_dec(&mad_agent_priv->refcount); 1151 + deref_mad_agent(mad_agent_priv); 1152 1152 goto error; 1153 1153 } 1154 1154 } ··· 1554 1554 hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; 1555 1555 rcu_read_lock(); 1556 1556 mad_agent = xa_load(&ib_mad_clients, hi_tid); 1557 - if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) 1557 + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) 1558 1558 mad_agent = NULL; 1559 1559 rcu_read_unlock(); 1560 1560 } else { ··· 1606 1606 } 1607 1607 } 1608 1608 if (mad_agent) 1609 - atomic_inc(&mad_agent->refcount); 1609 + refcount_inc(&mad_agent->refcount); 1610 1610 out: 1611 1611 spin_unlock_irqrestore(&port_priv->reg_lock, flags); 1612 1612 } ··· 1831 1831 mad_agent_priv->agent.recv_handler( 1832 1832 &mad_agent_priv->agent, NULL, 1833 1833 mad_recv_wc); 1834 - atomic_dec(&mad_agent_priv->refcount); 1834 + deref_mad_agent(mad_agent_priv); 1835 1835 } else { 1836 1836 /* not user rmpp, revert to normal behavior and 1837 1837 * drop the mad */ ··· 1848 1848 &mad_agent_priv->agent, 1849 1849 &mad_send_wr->send_buf, 1850 1850 mad_recv_wc); 1851 - atomic_dec(&mad_agent_priv->refcount); 1851 + deref_mad_agent(mad_agent_priv); 1852 1852 1853 1853 mad_send_wc.status = IB_WC_SUCCESS; 1854 1854 mad_send_wc.vendor_err = 0; ··· 2438 2438 list_del(&mad_send_wr->agent_list); 2439 2439 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2440 2440 &mad_send_wc); 2441 - atomic_dec(&mad_agent_priv->refcount); 2441 + deref_mad_agent(mad_agent_priv); 2442 2442 } 2443 2443 } 2444 2444 ··· 2572 2572 &local->mad_send_wr->send_buf, 2573 2573 &local->mad_priv->header.recv_wc); 2574 2574 spin_lock_irqsave(&recv_mad_agent->lock, flags); 2575 - atomic_dec(&recv_mad_agent->refcount); 2575 + deref_mad_agent(recv_mad_agent); 2576 2576 spin_unlock_irqrestore(&recv_mad_agent->lock, flags); 2577 2577 } 2578 2578 ··· 2585 2585 &mad_send_wc); 2586 2586 2587 2587 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2588 - atomic_dec(&mad_agent_priv->refcount); 2588 + deref_mad_agent(mad_agent_priv); 2589 2589 if (free_mad) 2590 2590 kfree(local->mad_priv); 2591 2591 kfree(local); ··· 2671 2671 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2672 2672 &mad_send_wc); 2673 2673 2674 - atomic_dec(&mad_agent_priv->refcount); 2674 + deref_mad_agent(mad_agent_priv); 2675 2675 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2676 2676 } 2677 2677 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+1 -1
drivers/infiniband/core/mad_priv.h
··· 103 103 struct work_struct local_work; 104 104 struct list_head rmpp_list; 105 105 106 - atomic_t refcount; 106 + refcount_t refcount; 107 107 union { 108 108 struct completion comp; 109 109 struct rcu_head rcu;
+9 -18
drivers/infiniband/core/mad_rmpp.c
··· 40 40 enum rmpp_state { 41 41 RMPP_STATE_ACTIVE, 42 42 RMPP_STATE_TIMEOUT, 43 - RMPP_STATE_COMPLETE, 44 - RMPP_STATE_CANCELING 43 + RMPP_STATE_COMPLETE 45 44 }; 46 45 47 46 struct mad_rmpp_recv { ··· 51 52 struct completion comp; 52 53 enum rmpp_state state; 53 54 spinlock_t lock; 54 - atomic_t refcount; 55 + refcount_t refcount; 55 56 56 57 struct ib_ah *ah; 57 58 struct ib_mad_recv_wc *rmpp_wc; ··· 72 73 73 74 static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) 74 75 { 75 - if (atomic_dec_and_test(&rmpp_recv->refcount)) 76 + if (refcount_dec_and_test(&rmpp_recv->refcount)) 76 77 complete(&rmpp_recv->comp); 77 78 } 78 79 ··· 91 92 92 93 spin_lock_irqsave(&agent->lock, flags); 93 94 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { 94 - if (rmpp_recv->state != RMPP_STATE_COMPLETE) 95 - ib_free_recv_mad(rmpp_recv->rmpp_wc); 96 - rmpp_recv->state = RMPP_STATE_CANCELING; 97 - } 98 - spin_unlock_irqrestore(&agent->lock, flags); 99 - 100 - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { 101 95 cancel_delayed_work(&rmpp_recv->timeout_work); 102 96 cancel_delayed_work(&rmpp_recv->cleanup_work); 103 97 } 98 + spin_unlock_irqrestore(&agent->lock, flags); 104 99 105 100 flush_workqueue(agent->qp_info->port_priv->wq); 106 101 107 102 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, 108 103 &agent->rmpp_list, list) { 109 104 list_del(&rmpp_recv->list); 105 + if (rmpp_recv->state != RMPP_STATE_COMPLETE) 106 + ib_free_recv_mad(rmpp_recv->rmpp_wc); 110 107 destroy_rmpp_recv(rmpp_recv); 111 108 } 112 109 } ··· 267 272 unsigned long flags; 268 273 269 274 spin_lock_irqsave(&rmpp_recv->agent->lock, flags); 270 - if (rmpp_recv->state == RMPP_STATE_CANCELING) { 271 - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); 272 - return; 273 - } 274 275 list_del(&rmpp_recv->list); 275 276 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); 276 277 destroy_rmpp_recv(rmpp_recv); ··· 296 305 INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); 297 306 spin_lock_init(&rmpp_recv->lock); 298 307 rmpp_recv->state = RMPP_STATE_ACTIVE; 299 - atomic_set(&rmpp_recv->refcount, 1); 308 + refcount_set(&rmpp_recv->refcount, 1); 300 309 301 310 rmpp_recv->rmpp_wc = mad_recv_wc; 302 311 rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; ··· 348 357 spin_lock_irqsave(&agent->lock, flags); 349 358 rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); 350 359 if (rmpp_recv) 351 - atomic_inc(&rmpp_recv->refcount); 360 + refcount_inc(&rmpp_recv->refcount); 352 361 spin_unlock_irqrestore(&agent->lock, flags); 353 362 return rmpp_recv; 354 363 } ··· 544 553 destroy_rmpp_recv(rmpp_recv); 545 554 return continue_rmpp(agent, mad_recv_wc); 546 555 } 547 - atomic_inc(&rmpp_recv->refcount); 556 + refcount_inc(&rmpp_recv->refcount); 548 557 549 558 if (get_last_flag(&mad_recv_wc->recv_buf)) { 550 559 rmpp_recv->state = RMPP_STATE_COMPLETE;
+129 -100
drivers/infiniband/core/nldev.c
··· 114 114 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, 115 115 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, 116 116 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, 117 + [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, 117 118 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, 118 119 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, 119 120 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, ··· 447 446 return err ? -EMSGSIZE : 0; 448 447 } 449 448 450 - static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, 451 - struct rdma_restrack_entry *res) 449 + static int fill_res_qp_entry_query(struct sk_buff *msg, 450 + struct rdma_restrack_entry *res, 451 + struct ib_device *dev, 452 + struct ib_qp *qp) 452 453 { 453 - if (!dev->ops.fill_res_entry) 454 - return false; 455 - return dev->ops.fill_res_entry(msg, res); 456 - } 457 - 458 - static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, 459 - struct rdma_restrack_entry *res) 460 - { 461 - if (!dev->ops.fill_stat_entry) 462 - return false; 463 - return dev->ops.fill_stat_entry(msg, res); 464 - } 465 - 466 - static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, 467 - struct rdma_restrack_entry *res, uint32_t port) 468 - { 469 - struct ib_qp *qp = container_of(res, struct ib_qp, res); 470 - struct ib_device *dev = qp->device; 471 454 struct ib_qp_init_attr qp_init_attr; 472 455 struct ib_qp_attr qp_attr; 473 456 int ret; ··· 460 475 if (ret) 461 476 return ret; 462 477 463 - if (port && port != qp_attr.port_num) 464 - return -EAGAIN; 465 - 466 - /* In create_qp() port is not set yet */ 467 - if (qp_attr.port_num && 468 - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) 469 - goto err; 470 - 471 - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) 472 - goto err; 473 478 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { 474 479 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, 475 480 qp_attr.dest_qp_num)) ··· 483 508 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) 484 509 goto err; 485 510 486 - if (!rdma_is_kernel_res(res) && 487 - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) 488 - goto err; 489 - 490 - if (fill_res_name_pid(msg, res)) 491 - goto err; 492 - 493 - if (fill_res_entry(dev, msg, res)) 494 - goto err; 495 - 511 + if (dev->ops.fill_res_qp_entry) 512 + return dev->ops.fill_res_qp_entry(msg, qp); 496 513 return 0; 497 514 498 515 err: return -EMSGSIZE; 516 + } 517 + 518 + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, 519 + struct rdma_restrack_entry *res, uint32_t port) 520 + { 521 + struct ib_qp *qp = container_of(res, struct ib_qp, res); 522 + struct ib_device *dev = qp->device; 523 + int ret; 524 + 525 + if (port && port != qp->port) 526 + return -EAGAIN; 527 + 528 + /* In create_qp() port is not set yet */ 529 + if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) 530 + return -EINVAL; 531 + 532 + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); 533 + if (ret) 534 + return -EMSGSIZE; 535 + 536 + if (!rdma_is_kernel_res(res) && 537 + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) 538 + return -EMSGSIZE; 539 + 540 + ret = fill_res_name_pid(msg, res); 541 + if (ret) 542 + return -EMSGSIZE; 543 + 544 + return fill_res_qp_entry_query(msg, res, dev, qp); 545 + } 546 + 547 + static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 548 + struct rdma_restrack_entry *res, uint32_t port) 549 + { 550 + struct ib_qp *qp = container_of(res, struct ib_qp, res); 551 + struct ib_device *dev = qp->device; 552 + 553 + if (port && port != qp->port) 554 + return -EAGAIN; 555 + if (!dev->ops.fill_res_qp_entry_raw) 556 + return -EINVAL; 557 + return dev->ops.fill_res_qp_entry_raw(msg, qp); 499 558 } 500 559 501 560 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, ··· 577 568 if (fill_res_name_pid(msg, res)) 578 569 goto err; 579 570 580 - if (fill_res_entry(dev, msg, res)) 581 - goto err; 582 - 571 + if (dev->ops.fill_res_cm_id_entry) 572 + return dev->ops.fill_res_cm_id_entry(msg, cm_id); 583 573 return 0; 584 574 585 575 err: return -EMSGSIZE; ··· 591 583 struct ib_device *dev = cq->device; 592 584 593 585 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) 594 - goto err; 586 + return -EMSGSIZE; 595 587 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 596 588 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) 597 - goto err; 589 + return -EMSGSIZE; 598 590 599 591 /* Poll context is only valid for kernel CQs */ 600 592 if (rdma_is_kernel_res(res) && 601 593 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) 602 - goto err; 594 + return -EMSGSIZE; 603 595 604 596 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) 605 - goto err; 597 + return -EMSGSIZE; 606 598 607 599 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) 608 - goto err; 600 + return -EMSGSIZE; 609 601 if (!rdma_is_kernel_res(res) && 610 602 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, 611 603 cq->uobject->uevent.uobject.context->res.id)) 612 - goto err; 604 + return -EMSGSIZE; 613 605 614 606 if (fill_res_name_pid(msg, res)) 615 - goto err; 607 + return -EMSGSIZE; 616 608 617 - if (fill_res_entry(dev, msg, res)) 618 - goto err; 609 + return (dev->ops.fill_res_cq_entry) ? 610 + dev->ops.fill_res_cq_entry(msg, cq) : 0; 611 + } 619 612 620 - return 0; 613 + static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 614 + struct rdma_restrack_entry *res, uint32_t port) 615 + { 616 + struct ib_cq *cq = container_of(res, struct ib_cq, res); 617 + struct ib_device *dev = cq->device; 621 618 622 - err: return -EMSGSIZE; 619 + if (!dev->ops.fill_res_cq_entry_raw) 620 + return -EINVAL; 621 + return dev->ops.fill_res_cq_entry_raw(msg, cq); 623 622 } 624 623 625 624 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, ··· 637 622 638 623 if (has_cap_net_admin) { 639 624 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) 640 - goto err; 625 + return -EMSGSIZE; 641 626 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) 642 - goto err; 627 + return -EMSGSIZE; 643 628 } 644 629 645 630 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 646 631 RDMA_NLDEV_ATTR_PAD)) 647 - goto err; 632 + return -EMSGSIZE; 648 633 649 634 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 650 - goto err; 635 + return -EMSGSIZE; 651 636 652 637 if (!rdma_is_kernel_res(res) && 653 638 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) 654 - goto err; 639 + return -EMSGSIZE; 655 640 656 641 if (fill_res_name_pid(msg, res)) 657 - goto err; 642 + return -EMSGSIZE; 658 643 659 - if (fill_res_entry(dev, msg, res)) 660 - goto err; 644 + return (dev->ops.fill_res_mr_entry) ? 645 + dev->ops.fill_res_mr_entry(msg, mr) : 646 + 0; 647 + } 661 648 662 - return 0; 649 + static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, 650 + struct rdma_restrack_entry *res, uint32_t port) 651 + { 652 + struct ib_mr *mr = container_of(res, struct ib_mr, res); 653 + struct ib_device *dev = mr->pd->device; 663 654 664 - err: return -EMSGSIZE; 655 + if (!dev->ops.fill_res_mr_entry_raw) 656 + return -EINVAL; 657 + return dev->ops.fill_res_mr_entry_raw(msg, mr); 665 658 } 666 659 667 660 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, 668 661 struct rdma_restrack_entry *res, uint32_t port) 669 662 { 670 663 struct ib_pd *pd = container_of(res, struct ib_pd, res); 671 - struct ib_device *dev = pd->device; 672 664 673 665 if (has_cap_net_admin) { 674 666 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, ··· 698 676 pd->uobject->context->res.id)) 699 677 goto err; 700 678 701 - if (fill_res_name_pid(msg, res)) 702 - goto err; 703 - 704 - if (fill_res_entry(dev, msg, res)) 705 - goto err; 706 - 707 - return 0; 679 + return fill_res_name_pid(msg, res); 708 680 709 681 err: return -EMSGSIZE; 710 682 } ··· 711 695 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) 712 696 return -EMSGSIZE; 713 697 714 - if (m->mode == RDMA_COUNTER_MODE_AUTO) 698 + if (m->mode == RDMA_COUNTER_MODE_AUTO) { 715 699 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && 716 700 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) 717 701 return -EMSGSIZE; 702 + 703 + if ((m->mask & RDMA_COUNTER_MASK_PID) && 704 + fill_res_name_pid(msg, &counter->res)) 705 + return -EMSGSIZE; 706 + } 718 707 719 708 return 0; 720 709 } ··· 759 738 xa_lock(&rt->xa); 760 739 xa_for_each(&rt->xa, id, res) { 761 740 qp = container_of(res, struct ib_qp, res); 762 - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 763 - continue; 764 - 765 741 if (!qp->counter || (qp->counter->id != counter->id)) 766 742 continue; 767 743 ··· 811 793 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) 812 794 goto err; 813 795 814 - if (fill_stat_entry(dev, msg, res)) 815 - goto err; 816 - 796 + if (dev->ops.fill_stat_mr_entry) 797 + return dev->ops.fill_stat_mr_entry(msg, mr); 817 798 return 0; 818 799 819 800 err: ··· 857 840 858 841 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || 859 842 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || 860 - fill_res_name_pid(msg, &counter->res) || 861 843 fill_stat_counter_mode(msg, counter) || 862 844 fill_stat_counter_qps(msg, counter) || 863 845 fill_stat_counter_hwcounters(msg, counter)) ··· 1193 1177 1194 1178 struct nldev_fill_res_entry { 1195 1179 enum rdma_nldev_attr nldev_attr; 1196 - enum rdma_nldev_command nldev_cmd; 1197 1180 u8 flags; 1198 1181 u32 entry; 1199 1182 u32 id; ··· 1204 1189 1205 1190 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { 1206 1191 [RDMA_RESTRACK_QP] = { 1207 - .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, 1208 1192 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, 1209 1193 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, 1210 1194 .id = RDMA_NLDEV_ATTR_RES_LQPN, 1211 1195 }, 1212 1196 [RDMA_RESTRACK_CM_ID] = { 1213 - .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, 1214 1197 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, 1215 1198 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, 1216 1199 .id = RDMA_NLDEV_ATTR_RES_CM_IDN, 1217 1200 }, 1218 1201 [RDMA_RESTRACK_CQ] = { 1219 - .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, 1220 1202 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, 1221 1203 .flags = NLDEV_PER_DEV, 1222 1204 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, 1223 1205 .id = RDMA_NLDEV_ATTR_RES_CQN, 1224 1206 }, 1225 1207 [RDMA_RESTRACK_MR] = { 1226 - .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, 1227 1208 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, 1228 1209 .flags = NLDEV_PER_DEV, 1229 1210 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, 1230 1211 .id = RDMA_NLDEV_ATTR_RES_MRN, 1231 1212 }, 1232 1213 [RDMA_RESTRACK_PD] = { 1233 - .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, 1234 1214 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, 1235 1215 .flags = NLDEV_PER_DEV, 1236 1216 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, 1237 1217 .id = RDMA_NLDEV_ATTR_RES_PDN, 1238 1218 }, 1239 1219 [RDMA_RESTRACK_COUNTER] = { 1240 - .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, 1241 1220 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, 1242 1221 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, 1243 1222 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, ··· 1290 1281 } 1291 1282 1292 1283 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 1293 - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), 1284 + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1285 + RDMA_NL_GET_OP(nlh->nlmsg_type)), 1294 1286 0, 0); 1295 1287 1296 1288 if (fill_nldev_handle(msg, device)) { ··· 1369 1359 } 1370 1360 1371 1361 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 1372 - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), 1362 + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1363 + RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 1373 1364 0, NLM_F_MULTI); 1374 1365 1375 1366 if (fill_nldev_handle(skb, device)) { ··· 1452 1441 return ret; 1453 1442 } 1454 1443 1455 - #define RES_GET_FUNCS(name, type) \ 1456 - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ 1444 + #define RES_GET_FUNCS(name, type) \ 1445 + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ 1457 1446 struct netlink_callback *cb) \ 1458 - { \ 1459 - return res_get_common_dumpit(skb, cb, type, \ 1460 - fill_res_##name##_entry); \ 1461 - } \ 1462 - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ 1463 - struct nlmsghdr *nlh, \ 1447 + { \ 1448 + return res_get_common_dumpit(skb, cb, type, \ 1449 + fill_res_##name##_entry); \ 1450 + } \ 1451 + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ 1452 + struct nlmsghdr *nlh, \ 1464 1453 struct netlink_ext_ack *extack) \ 1465 - { \ 1466 - return res_get_common_doit(skb, nlh, extack, type, \ 1467 - fill_res_##name##_entry); \ 1454 + { \ 1455 + return res_get_common_doit(skb, nlh, extack, type, \ 1456 + fill_res_##name##_entry); \ 1468 1457 } 1469 1458 1470 1459 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); 1460 + RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); 1471 1461 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); 1472 1462 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); 1463 + RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); 1473 1464 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); 1474 1465 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); 1466 + RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); 1475 1467 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); 1476 1468 1477 1469 static LIST_HEAD(link_ops); ··· 2157 2143 }, 2158 2144 [RDMA_NLDEV_CMD_STAT_DEL] = { 2159 2145 .doit = nldev_stat_del_doit, 2146 + .flags = RDMA_NL_ADMIN_PERM, 2147 + }, 2148 + [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { 2149 + .doit = nldev_res_get_qp_raw_doit, 2150 + .dump = nldev_res_get_qp_raw_dumpit, 2151 + .flags = RDMA_NL_ADMIN_PERM, 2152 + }, 2153 + [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { 2154 + .doit = nldev_res_get_cq_raw_doit, 2155 + .dump = nldev_res_get_cq_raw_dumpit, 2156 + .flags = RDMA_NL_ADMIN_PERM, 2157 + }, 2158 + [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { 2159 + .doit = nldev_res_get_mr_raw_doit, 2160 + .dump = nldev_res_get_mr_raw_dumpit, 2160 2161 .flags = RDMA_NL_ADMIN_PERM, 2161 2162 }, 2162 2163 };
+41 -20
drivers/infiniband/core/sysfs.c
··· 58 58 struct ib_device *ibdev; 59 59 struct gid_attr_group *gid_attr_group; 60 60 struct attribute_group gid_group; 61 - struct attribute_group pkey_group; 61 + struct attribute_group *pkey_group; 62 62 struct attribute_group *pma_table; 63 63 struct attribute_group *hw_stats_ag; 64 64 struct rdma_hw_stats *hw_stats; ··· 681 681 kfree(p->gid_group.attrs); 682 682 } 683 683 684 - if (p->pkey_group.attrs) { 685 - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) 686 - kfree(a); 684 + if (p->pkey_group) { 685 + if (p->pkey_group->attrs) { 686 + for (i = 0; (a = p->pkey_group->attrs[i]); ++i) 687 + kfree(a); 687 688 688 - kfree(p->pkey_group.attrs); 689 + kfree(p->pkey_group->attrs); 690 + } 691 + 692 + kfree(p->pkey_group); 693 + p->pkey_group = NULL; 689 694 } 690 695 691 696 kfree(p); ··· 1123 1118 if (ret) 1124 1119 goto err_free_gid_type; 1125 1120 1126 - p->pkey_group.name = "pkeys"; 1127 - p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, 1128 - attr.pkey_tbl_len); 1129 - if (!p->pkey_group.attrs) { 1130 - ret = -ENOMEM; 1131 - goto err_remove_gid_type; 1121 + if (attr.pkey_tbl_len) { 1122 + p->pkey_group = kzalloc(sizeof(*p->pkey_group), GFP_KERNEL); 1123 + if (!p->pkey_group) { 1124 + ret = -ENOMEM; 1125 + goto err_remove_gid_type; 1126 + } 1127 + 1128 + p->pkey_group->name = "pkeys"; 1129 + p->pkey_group->attrs = alloc_group_attrs(show_port_pkey, 1130 + attr.pkey_tbl_len); 1131 + if (!p->pkey_group->attrs) { 1132 + ret = -ENOMEM; 1133 + goto err_free_pkey_group; 1134 + } 1135 + 1136 + ret = sysfs_create_group(&p->kobj, p->pkey_group); 1137 + if (ret) 1138 + goto err_free_pkey; 1132 1139 } 1133 1140 1134 - ret = sysfs_create_group(&p->kobj, &p->pkey_group); 1135 - if (ret) 1136 - goto err_free_pkey; 1137 1141 1138 1142 if (device->ops.init_port && is_full_dev) { 1139 1143 ret = device->ops.init_port(device, port_num, &p->kobj); ··· 1164 1150 return 0; 1165 1151 1166 1152 err_remove_pkey: 1167 - sysfs_remove_group(&p->kobj, &p->pkey_group); 1153 + if (p->pkey_group) 1154 + sysfs_remove_group(&p->kobj, p->pkey_group); 1168 1155 1169 1156 err_free_pkey: 1170 - for (i = 0; i < attr.pkey_tbl_len; ++i) 1171 - kfree(p->pkey_group.attrs[i]); 1157 + if (p->pkey_group) { 1158 + for (i = 0; i < attr.pkey_tbl_len; ++i) 1159 + kfree(p->pkey_group->attrs[i]); 1172 1160 1173 - kfree(p->pkey_group.attrs); 1174 - p->pkey_group.attrs = NULL; 1161 + kfree(p->pkey_group->attrs); 1162 + p->pkey_group->attrs = NULL; 1163 + } 1164 + 1165 + err_free_pkey_group: 1166 + kfree(p->pkey_group); 1175 1167 1176 1168 err_remove_gid_type: 1177 1169 sysfs_remove_group(&p->gid_attr_group->kobj, ··· 1337 1317 1338 1318 if (port->pma_table) 1339 1319 sysfs_remove_group(p, port->pma_table); 1340 - sysfs_remove_group(p, &port->pkey_group); 1320 + if (port->pkey_group) 1321 + sysfs_remove_group(p, port->pkey_group); 1341 1322 sysfs_remove_group(p, &port->gid_group); 1342 1323 sysfs_remove_group(&port->gid_attr_group->kobj, 1343 1324 &port->gid_attr_group->ndev);
-2
drivers/infiniband/core/trace.c
··· 9 9 10 10 #define CREATE_TRACE_POINTS 11 11 12 - #include <rdma/ib_verbs.h> 13 - 14 12 #include <trace/events/rdma_core.h>
+1
drivers/infiniband/core/umem.c
··· 261 261 sg = umem->sg_head.sgl; 262 262 263 263 while (npages) { 264 + cond_resched(); 264 265 ret = pin_user_pages_fast(cur_base, 265 266 min_t(unsigned long, npages, 266 267 PAGE_SIZE /
+2
drivers/infiniband/core/umem_odp.c
··· 152 152 * ib_alloc_implicit_odp_umem() 153 153 * @addr: The starting userspace VA 154 154 * @size: The length of the userspace VA 155 + * @ops: MMU interval ops, currently only @invalidate 155 156 */ 156 157 struct ib_umem_odp * 157 158 ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, ··· 214 213 * @addr: userspace virtual address to start at 215 214 * @size: length of region to pin 216 215 * @access: IB_ACCESS_xxx flags for memory being pinned 216 + * @ops: MMU interval ops, currently only @invalidate 217 217 * 218 218 * The driver should use when the access flags indicate ODP memory. It avoids 219 219 * pinning, instead, stores the mm for future page fault handling in
+100 -223
drivers/infiniband/core/uverbs_cmd.c
··· 415 415 416 416 static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) 417 417 { 418 + struct ib_uverbs_alloc_pd_resp resp = {}; 418 419 struct ib_uverbs_alloc_pd cmd; 419 - struct ib_uverbs_alloc_pd_resp resp; 420 420 struct ib_uobject *uobj; 421 421 struct ib_pd *pd; 422 422 int ret; ··· 438 438 439 439 pd->device = ib_dev; 440 440 pd->uobject = uobj; 441 - pd->__internal_mr = NULL; 442 441 atomic_set(&pd->usecnt, 0); 443 442 pd->res.type = RDMA_RESTRACK_PD; 444 443 445 444 ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); 446 445 if (ret) 447 446 goto err_alloc; 448 - 449 - uobj->object = pd; 450 - memset(&resp, 0, sizeof resp); 451 - resp.pd_handle = uobj->id; 452 447 rdma_restrack_uadd(&pd->res); 453 448 454 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 455 - if (ret) 456 - goto err_copy; 449 + uobj->object = pd; 450 + uobj_finalize_uobj_create(uobj, attrs); 457 451 458 - rdma_alloc_commit_uobject(uobj, attrs); 459 - return 0; 452 + resp.pd_handle = uobj->id; 453 + return uverbs_response(attrs, &resp, sizeof(resp)); 460 454 461 - err_copy: 462 - ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); 463 - pd = NULL; 464 455 err_alloc: 465 456 kfree(pd); 466 457 err: ··· 559 568 static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) 560 569 { 561 570 struct ib_uverbs_device *ibudev = attrs->ufile->device; 571 + struct ib_uverbs_open_xrcd_resp resp = {}; 562 572 struct ib_uverbs_open_xrcd cmd; 563 - struct ib_uverbs_open_xrcd_resp resp; 564 573 struct ib_uxrcd_object *obj; 565 574 struct ib_xrcd *xrcd = NULL; 566 - struct fd f = {NULL, 0}; 567 575 struct inode *inode = NULL; 568 - int ret = 0; 569 576 int new_xrcd = 0; 570 577 struct ib_device *ib_dev; 578 + struct fd f = {}; 579 + int ret; 571 580 572 581 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); 573 582 if (ret) ··· 605 614 } 606 615 607 616 if (!xrcd) { 608 - xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata); 617 + xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata); 609 618 if (IS_ERR(xrcd)) { 610 619 ret = PTR_ERR(xrcd); 611 620 goto err; 612 621 } 613 - 614 - xrcd->inode = inode; 615 - xrcd->device = ib_dev; 616 - atomic_set(&xrcd->usecnt, 0); 617 - mutex_init(&xrcd->tgt_qp_mutex); 618 - INIT_LIST_HEAD(&xrcd->tgt_qp_list); 619 622 new_xrcd = 1; 620 623 } 621 624 622 625 atomic_set(&obj->refcnt, 0); 623 626 obj->uobject.object = xrcd; 624 - memset(&resp, 0, sizeof resp); 625 - resp.xrcd_handle = obj->uobject.id; 626 627 627 628 if (inode) { 628 629 if (new_xrcd) { ··· 626 643 atomic_inc(&xrcd->usecnt); 627 644 } 628 645 629 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 630 - if (ret) 631 - goto err_copy; 632 - 633 646 if (f.file) 634 647 fdput(f); 635 648 636 649 mutex_unlock(&ibudev->xrcd_tree_mutex); 650 + uobj_finalize_uobj_create(&obj->uobject, attrs); 637 651 638 - rdma_alloc_commit_uobject(&obj->uobject, attrs); 639 - return 0; 640 - 641 - err_copy: 642 - if (inode) { 643 - if (new_xrcd) 644 - xrcd_table_delete(ibudev, inode); 645 - atomic_dec(&xrcd->usecnt); 646 - } 652 + resp.xrcd_handle = obj->uobject.id; 653 + return uverbs_response(attrs, &resp, sizeof(resp)); 647 654 648 655 err_dealloc_xrcd: 649 - ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); 656 + ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs)); 650 657 651 658 err: 652 659 uobj_alloc_abort(&obj->uobject, attrs); ··· 674 701 if (inode && !atomic_dec_and_test(&xrcd->usecnt)) 675 702 return 0; 676 703 677 - ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); 704 + ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); 678 705 679 706 if (ib_is_destroy_retryable(ret, why, uobject)) { 680 707 atomic_inc(&xrcd->usecnt); ··· 689 716 690 717 static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) 691 718 { 719 + struct ib_uverbs_reg_mr_resp resp = {}; 692 720 struct ib_uverbs_reg_mr cmd; 693 - struct ib_uverbs_reg_mr_resp resp; 694 721 struct ib_uobject *uobj; 695 722 struct ib_pd *pd; 696 723 struct ib_mr *mr; ··· 743 770 mr->uobject = uobj; 744 771 atomic_inc(&pd->usecnt); 745 772 mr->res.type = RDMA_RESTRACK_MR; 773 + mr->iova = cmd.hca_va; 746 774 rdma_restrack_uadd(&mr->res); 747 775 748 776 uobj->object = mr; 749 - 750 - memset(&resp, 0, sizeof resp); 751 - resp.lkey = mr->lkey; 752 - resp.rkey = mr->rkey; 753 - resp.mr_handle = uobj->id; 754 - 755 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 756 - if (ret) 757 - goto err_copy; 758 - 759 777 uobj_put_obj_read(pd); 778 + uobj_finalize_uobj_create(uobj, attrs); 760 779 761 - rdma_alloc_commit_uobject(uobj, attrs); 762 - return 0; 763 - 764 - err_copy: 765 - ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); 780 + resp.lkey = mr->lkey; 781 + resp.rkey = mr->rkey; 782 + resp.mr_handle = uobj->id; 783 + return uverbs_response(attrs, &resp, sizeof(resp)); 766 784 767 785 err_put: 768 786 uobj_put_obj_read(pd); 769 - 770 787 err_free: 771 788 uobj_alloc_abort(uobj, attrs); 772 789 return ret; ··· 823 860 mr->pd = pd; 824 861 atomic_dec(&old_pd->usecnt); 825 862 } 863 + 864 + if (cmd.flags & IB_MR_REREG_TRANS) 865 + mr->iova = cmd.hca_va; 826 866 827 867 memset(&resp, 0, sizeof(resp)); 828 868 resp.lkey = mr->lkey; ··· 896 930 atomic_inc(&pd->usecnt); 897 931 898 932 uobj->object = mw; 899 - 900 - memset(&resp, 0, sizeof(resp)); 901 - resp.rkey = mw->rkey; 902 - resp.mw_handle = uobj->id; 903 - 904 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 905 - if (ret) 906 - goto err_copy; 907 - 908 933 uobj_put_obj_read(pd); 909 - rdma_alloc_commit_uobject(uobj, attrs); 910 - return 0; 934 + uobj_finalize_uobj_create(uobj, attrs); 911 935 912 - err_copy: 913 - uverbs_dealloc_mw(mw); 936 + resp.rkey = mw->rkey; 937 + resp.mw_handle = uobj->id; 938 + return uverbs_response(attrs, &resp, sizeof(resp)); 939 + 914 940 err_put: 915 941 uobj_put_obj_read(pd); 916 942 err_free: ··· 939 981 if (IS_ERR(uobj)) 940 982 return PTR_ERR(uobj); 941 983 942 - resp.fd = uobj->id; 943 - 944 984 ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, 945 985 uobj); 946 986 ib_uverbs_init_event_queue(&ev_file->ev_queue); 987 + uobj_finalize_uobj_create(uobj, attrs); 947 988 948 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 949 - if (ret) { 950 - uobj_alloc_abort(uobj, attrs); 951 - return ret; 952 - } 953 - 954 - rdma_alloc_commit_uobject(uobj, attrs); 955 - return 0; 989 + resp.fd = uobj->id; 990 + return uverbs_response(attrs, &resp, sizeof(resp)); 956 991 } 957 992 958 - static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, 959 - struct ib_uverbs_ex_create_cq *cmd) 993 + static int create_cq(struct uverbs_attr_bundle *attrs, 994 + struct ib_uverbs_ex_create_cq *cmd) 960 995 { 961 996 struct ib_ucq_object *obj; 962 997 struct ib_uverbs_completion_event_file *ev_file = NULL; 963 998 struct ib_cq *cq; 964 999 int ret; 965 - struct ib_uverbs_ex_create_cq_resp resp; 1000 + struct ib_uverbs_ex_create_cq_resp resp = {}; 966 1001 struct ib_cq_init_attr attr = {}; 967 1002 struct ib_device *ib_dev; 968 1003 969 1004 if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) 970 - return ERR_PTR(-EINVAL); 1005 + return -EINVAL; 971 1006 972 1007 obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, 973 1008 &ib_dev); 974 1009 if (IS_ERR(obj)) 975 - return obj; 1010 + return PTR_ERR(obj); 976 1011 977 1012 if (cmd->comp_channel >= 0) { 978 1013 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); ··· 994 1043 cq->event_handler = ib_uverbs_cq_event_handler; 995 1044 cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; 996 1045 atomic_set(&cq->usecnt, 0); 1046 + cq->res.type = RDMA_RESTRACK_CQ; 997 1047 998 1048 ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); 999 1049 if (ret) 1000 1050 goto err_free; 1051 + rdma_restrack_uadd(&cq->res); 1001 1052 1002 1053 obj->uevent.uobject.object = cq; 1003 1054 obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); 1004 1055 if (obj->uevent.event_file) 1005 1056 uverbs_uobject_get(&obj->uevent.event_file->uobj); 1057 + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); 1006 1058 1007 - memset(&resp, 0, sizeof resp); 1008 1059 resp.base.cq_handle = obj->uevent.uobject.id; 1009 - resp.base.cqe = cq->cqe; 1060 + resp.base.cqe = cq->cqe; 1010 1061 resp.response_length = uverbs_response_length(attrs, sizeof(resp)); 1062 + return uverbs_response(attrs, &resp, sizeof(resp)); 1011 1063 1012 - cq->res.type = RDMA_RESTRACK_CQ; 1013 - rdma_restrack_uadd(&cq->res); 1014 - 1015 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 1016 - if (ret) 1017 - goto err_cb; 1018 - 1019 - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); 1020 - return obj; 1021 - 1022 - err_cb: 1023 - if (obj->uevent.event_file) 1024 - uverbs_uobject_put(&obj->uevent.event_file->uobj); 1025 - ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); 1026 - cq = NULL; 1027 1064 err_free: 1028 1065 kfree(cq); 1029 1066 err_file: 1030 1067 if (ev_file) 1031 1068 ib_uverbs_release_ucq(ev_file, obj); 1032 - 1033 1069 err: 1034 1070 uobj_alloc_abort(&obj->uevent.uobject, attrs); 1035 - 1036 - return ERR_PTR(ret); 1071 + return ret; 1037 1072 } 1038 1073 1039 1074 static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) 1040 1075 { 1041 1076 struct ib_uverbs_create_cq cmd; 1042 1077 struct ib_uverbs_ex_create_cq cmd_ex; 1043 - struct ib_ucq_object *obj; 1044 1078 int ret; 1045 1079 1046 1080 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); ··· 1038 1102 cmd_ex.comp_vector = cmd.comp_vector; 1039 1103 cmd_ex.comp_channel = cmd.comp_channel; 1040 1104 1041 - obj = create_cq(attrs, &cmd_ex); 1042 - return PTR_ERR_OR_ZERO(obj); 1105 + return create_cq(attrs, &cmd_ex); 1043 1106 } 1044 1107 1045 1108 static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) 1046 1109 { 1047 1110 struct ib_uverbs_ex_create_cq cmd; 1048 - struct ib_ucq_object *obj; 1049 1111 int ret; 1050 1112 1051 1113 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); ··· 1056 1122 if (cmd.reserved) 1057 1123 return -EINVAL; 1058 1124 1059 - obj = create_cq(attrs, &cmd); 1060 - return PTR_ERR_OR_ZERO(obj); 1125 + return create_cq(attrs, &cmd); 1061 1126 } 1062 1127 1063 1128 static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) ··· 1064 1131 struct ib_uverbs_resize_cq cmd; 1065 1132 struct ib_uverbs_resize_cq_resp resp = {}; 1066 1133 struct ib_cq *cq; 1067 - int ret = -EINVAL; 1134 + int ret; 1068 1135 1069 1136 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); 1070 1137 if (ret) ··· 1231 1298 struct ib_srq *srq = NULL; 1232 1299 struct ib_qp *qp; 1233 1300 struct ib_qp_init_attr attr = {}; 1234 - struct ib_uverbs_ex_create_qp_resp resp; 1301 + struct ib_uverbs_ex_create_qp_resp resp = {}; 1235 1302 int ret; 1236 1303 struct ib_rwq_ind_table *ind_tbl = NULL; 1237 1304 bool has_sq = true; ··· 1401 1468 if (obj->uevent.event_file) 1402 1469 uverbs_uobject_get(&obj->uevent.event_file->uobj); 1403 1470 1404 - memset(&resp, 0, sizeof resp); 1405 - resp.base.qpn = qp->qp_num; 1406 - resp.base.qp_handle = obj->uevent.uobject.id; 1407 - resp.base.max_recv_sge = attr.cap.max_recv_sge; 1408 - resp.base.max_send_sge = attr.cap.max_send_sge; 1409 - resp.base.max_recv_wr = attr.cap.max_recv_wr; 1410 - resp.base.max_send_wr = attr.cap.max_send_wr; 1411 - resp.base.max_inline_data = attr.cap.max_inline_data; 1412 - resp.response_length = uverbs_response_length(attrs, sizeof(resp)); 1413 - 1414 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 1415 - if (ret) 1416 - goto err_uevent; 1417 - 1418 1471 if (xrcd) { 1419 1472 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, 1420 1473 uobject); ··· 1421 1502 UVERBS_LOOKUP_READ); 1422 1503 if (ind_tbl) 1423 1504 uobj_put_obj_read(ind_tbl); 1505 + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); 1424 1506 1425 - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); 1426 - return 0; 1427 - err_uevent: 1428 - if (obj->uevent.event_file) 1429 - uverbs_uobject_put(&obj->uevent.event_file->uobj); 1507 + resp.base.qpn = qp->qp_num; 1508 + resp.base.qp_handle = obj->uevent.uobject.id; 1509 + resp.base.max_recv_sge = attr.cap.max_recv_sge; 1510 + resp.base.max_send_sge = attr.cap.max_send_sge; 1511 + resp.base.max_recv_wr = attr.cap.max_recv_wr; 1512 + resp.base.max_send_wr = attr.cap.max_send_wr; 1513 + resp.base.max_inline_data = attr.cap.max_inline_data; 1514 + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); 1515 + return uverbs_response(attrs, &resp, sizeof(resp)); 1516 + 1430 1517 err_cb: 1431 1518 ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); 1432 1519 ··· 1505 1580 1506 1581 static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) 1507 1582 { 1583 + struct ib_uverbs_create_qp_resp resp = {}; 1508 1584 struct ib_uverbs_open_qp cmd; 1509 - struct ib_uverbs_create_qp_resp resp; 1510 1585 struct ib_uqp_object *obj; 1511 1586 struct ib_xrcd *xrcd; 1512 - struct ib_uobject *xrcd_uobj; 1513 1587 struct ib_qp *qp; 1514 1588 struct ib_qp_open_attr attr = {}; 1515 1589 int ret; 1590 + struct ib_uobject *xrcd_uobj; 1516 1591 struct ib_device *ib_dev; 1517 1592 1518 1593 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); ··· 1552 1627 obj->uevent.uobject.object = qp; 1553 1628 obj->uevent.uobject.user_handle = cmd.user_handle; 1554 1629 1555 - memset(&resp, 0, sizeof resp); 1556 - resp.qpn = qp->qp_num; 1557 - resp.qp_handle = obj->uevent.uobject.id; 1558 - 1559 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 1560 - if (ret) 1561 - goto err_destroy; 1562 - 1563 1630 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); 1564 1631 atomic_inc(&obj->uxrcd->refcnt); 1565 1632 qp->uobject = obj; 1566 1633 uobj_put_read(xrcd_uobj); 1634 + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); 1567 1635 1568 - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); 1569 - return 0; 1636 + resp.qpn = qp->qp_num; 1637 + resp.qp_handle = obj->uevent.uobject.id; 1638 + return uverbs_response(attrs, &resp, sizeof(resp)); 1570 1639 1571 - err_destroy: 1572 - ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); 1573 1640 err_xrcd: 1574 1641 uobj_put_read(xrcd_uobj); 1575 1642 err_put: ··· 1897 1980 * Last bit is reserved for extending the attr_mask by 1898 1981 * using another field. 1899 1982 */ 1900 - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); 1983 + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); 1901 1984 1902 1985 if (cmd.base.attr_mask & 1903 1986 ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) ··· 2397 2480 ah->uobject = uobj; 2398 2481 uobj->user_handle = cmd.user_handle; 2399 2482 uobj->object = ah; 2483 + uobj_put_obj_read(pd); 2484 + uobj_finalize_uobj_create(uobj, attrs); 2400 2485 2401 2486 resp.ah_handle = uobj->id; 2402 - 2403 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 2404 - if (ret) 2405 - goto err_copy; 2406 - 2407 - uobj_put_obj_read(pd); 2408 - rdma_alloc_commit_uobject(uobj, attrs); 2409 - return 0; 2410 - 2411 - err_copy: 2412 - rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, 2413 - uverbs_get_cleared_udata(attrs)); 2487 + return uverbs_response(attrs, &resp, sizeof(resp)); 2414 2488 2415 2489 err_put: 2416 2490 uobj_put_obj_read(pd); 2417 - 2418 2491 err: 2419 2492 uobj_alloc_abort(uobj, attrs); 2420 2493 return ret; ··· 2896 2989 if (obj->uevent.event_file) 2897 2990 uverbs_uobject_get(&obj->uevent.event_file->uobj); 2898 2991 2899 - memset(&resp, 0, sizeof(resp)); 2992 + uobj_put_obj_read(pd); 2993 + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, 2994 + UVERBS_LOOKUP_READ); 2995 + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); 2996 + 2900 2997 resp.wq_handle = obj->uevent.uobject.id; 2901 2998 resp.max_sge = wq_init_attr.max_sge; 2902 2999 resp.max_wr = wq_init_attr.max_wr; 2903 3000 resp.wqn = wq->wq_num; 2904 3001 resp.response_length = uverbs_response_length(attrs, sizeof(resp)); 2905 - err = uverbs_response(attrs, &resp, sizeof(resp)); 2906 - if (err) 2907 - goto err_copy; 3002 + return uverbs_response(attrs, &resp, sizeof(resp)); 2908 3003 2909 - uobj_put_obj_read(pd); 2910 - rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, 2911 - UVERBS_LOOKUP_READ); 2912 - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); 2913 - return 0; 2914 - 2915 - err_copy: 2916 - if (obj->uevent.event_file) 2917 - uverbs_uobject_put(&obj->uevent.event_file->uobj); 2918 - ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); 2919 3004 err_put_cq: 2920 3005 rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, 2921 3006 UVERBS_LOOKUP_READ); ··· 2992 3093 struct ib_wq **wqs = NULL; 2993 3094 u32 *wqs_handles = NULL; 2994 3095 struct ib_wq *wq = NULL; 2995 - int i, j, num_read_wqs; 3096 + int i, num_read_wqs; 2996 3097 u32 num_wq_handles; 2997 3098 struct uverbs_req_iter iter; 2998 3099 struct ib_device *ib_dev; ··· 3038 3139 } 3039 3140 3040 3141 wqs[num_read_wqs] = wq; 3142 + atomic_inc(&wqs[num_read_wqs]->usecnt); 3041 3143 } 3042 3144 3043 3145 uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); ··· 3066 3166 atomic_set(&rwq_ind_tbl->usecnt, 0); 3067 3167 3068 3168 for (i = 0; i < num_wq_handles; i++) 3069 - atomic_inc(&wqs[i]->usecnt); 3169 + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, 3170 + UVERBS_LOOKUP_READ); 3171 + kfree(wqs_handles); 3172 + uobj_finalize_uobj_create(uobj, attrs); 3070 3173 3071 3174 resp.ind_tbl_handle = uobj->id; 3072 3175 resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; 3073 3176 resp.response_length = uverbs_response_length(attrs, sizeof(resp)); 3177 + return uverbs_response(attrs, &resp, sizeof(resp)); 3074 3178 3075 - err = uverbs_response(attrs, &resp, sizeof(resp)); 3076 - if (err) 3077 - goto err_copy; 3078 - 3079 - kfree(wqs_handles); 3080 - 3081 - for (j = 0; j < num_read_wqs; j++) 3082 - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, 3083 - UVERBS_LOOKUP_READ); 3084 - 3085 - rdma_alloc_commit_uobject(uobj, attrs); 3086 - return 0; 3087 - 3088 - err_copy: 3089 - ib_destroy_rwq_ind_table(rwq_ind_tbl); 3090 3179 err_uobj: 3091 3180 uobj_alloc_abort(uobj, attrs); 3092 3181 put_wqs: 3093 - for (j = 0; j < num_read_wqs; j++) 3094 - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, 3182 + for (i = 0; i < num_read_wqs; i++) { 3183 + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, 3095 3184 UVERBS_LOOKUP_READ); 3185 + atomic_dec(&wqs[i]->usecnt); 3186 + } 3096 3187 err_free: 3097 3188 kfree(wqs_handles); 3098 3189 kfree(wqs); ··· 3109 3218 static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) 3110 3219 { 3111 3220 struct ib_uverbs_create_flow cmd; 3112 - struct ib_uverbs_create_flow_resp resp; 3221 + struct ib_uverbs_create_flow_resp resp = {}; 3113 3222 struct ib_uobject *uobj; 3114 3223 struct ib_flow *flow_id; 3115 3224 struct ib_uverbs_flow_attr *kern_flow_attr; ··· 3242 3351 3243 3352 ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); 3244 3353 3245 - memset(&resp, 0, sizeof(resp)); 3246 - resp.flow_handle = uobj->id; 3247 - 3248 - err = uverbs_response(attrs, &resp, sizeof(resp)); 3249 - if (err) 3250 - goto err_copy; 3251 - 3252 3354 rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, 3253 3355 UVERBS_LOOKUP_READ); 3254 3356 kfree(flow_attr); 3357 + 3255 3358 if (cmd.flow_attr.num_of_specs) 3256 3359 kfree(kern_flow_attr); 3257 - rdma_alloc_commit_uobject(uobj, attrs); 3258 - return 0; 3259 - err_copy: 3260 - if (!qp->device->ops.destroy_flow(flow_id)) 3261 - atomic_dec(&qp->usecnt); 3360 + uobj_finalize_uobj_create(uobj, attrs); 3361 + 3362 + resp.flow_handle = uobj->id; 3363 + return uverbs_response(attrs, &resp, sizeof(resp)); 3364 + 3262 3365 err_free: 3263 3366 ib_uverbs_flow_resources_free(uflow_res); 3264 3367 err_free_flow_attr: ··· 3287 3402 struct ib_uverbs_create_xsrq *cmd, 3288 3403 struct ib_udata *udata) 3289 3404 { 3290 - struct ib_uverbs_create_srq_resp resp; 3405 + struct ib_uverbs_create_srq_resp resp = {}; 3291 3406 struct ib_usrq_object *obj; 3292 3407 struct ib_pd *pd; 3293 3408 struct ib_srq *srq; 3294 - struct ib_uobject *xrcd_uobj; 3295 3409 struct ib_srq_init_attr attr; 3296 3410 int ret; 3411 + struct ib_uobject *xrcd_uobj; 3297 3412 struct ib_device *ib_dev; 3298 3413 3299 3414 obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, ··· 3358 3473 if (obj->uevent.event_file) 3359 3474 uverbs_uobject_get(&obj->uevent.event_file->uobj); 3360 3475 3361 - memset(&resp, 0, sizeof resp); 3362 - resp.srq_handle = obj->uevent.uobject.id; 3363 - resp.max_wr = attr.attr.max_wr; 3364 - resp.max_sge = attr.attr.max_sge; 3365 3476 if (cmd->srq_type == IB_SRQT_XRC) 3366 3477 resp.srqn = srq->ext.xrc.srq_num; 3367 - 3368 - ret = uverbs_response(attrs, &resp, sizeof(resp)); 3369 - if (ret) 3370 - goto err_copy; 3371 3478 3372 3479 if (cmd->srq_type == IB_SRQT_XRC) 3373 3480 uobj_put_read(xrcd_uobj); ··· 3369 3492 UVERBS_LOOKUP_READ); 3370 3493 3371 3494 uobj_put_obj_read(pd); 3372 - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); 3373 - return 0; 3495 + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); 3374 3496 3375 - err_copy: 3376 - if (obj->uevent.event_file) 3377 - uverbs_uobject_put(&obj->uevent.event_file->uobj); 3378 - ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); 3497 + resp.srq_handle = obj->uevent.uobject.id; 3498 + resp.max_wr = attr.attr.max_wr; 3499 + resp.max_sge = attr.attr.max_sge; 3500 + return uverbs_response(attrs, &resp, sizeof(resp)); 3501 + 3379 3502 err_put_pd: 3380 3503 uobj_put_obj_read(pd); 3381 3504 err_put_cq:
+1
drivers/infiniband/core/uverbs_ioctl.c
··· 790 790 } 791 791 return uverbs_copy_to(bundle, idx, from, size); 792 792 } 793 + EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero); 793 794 794 795 /* Once called an abort will call through to the type's destroy_hw() */ 795 796 void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
+4
drivers/infiniband/core/uverbs_main.c
··· 601 601 memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); 602 602 bundle.ufile = file; 603 603 bundle.context = NULL; /* only valid if bundle has uobject */ 604 + bundle.uobject = NULL; 604 605 if (!method_elm->is_ex) { 605 606 size_t in_len = hdr.in_words * 4 - sizeof(hdr); 606 607 size_t out_len = hdr.out_words * 4; ··· 665 664 } 666 665 667 666 ret = method_elm->handler(&bundle); 667 + if (bundle.uobject) 668 + uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true, 669 + !ret, &bundle); 668 670 out_unlock: 669 671 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 670 672 return (ret) ? : count;
+9 -8
drivers/infiniband/core/uverbs_std_types_counters.c
··· 46 46 if (ret) 47 47 return ret; 48 48 49 - return counters->device->ops.destroy_counters(counters); 49 + counters->device->ops.destroy_counters(counters); 50 + kfree(counters); 51 + return 0; 50 52 } 51 53 52 54 static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( ··· 68 66 if (!ib_dev->ops.create_counters) 69 67 return -EOPNOTSUPP; 70 68 71 - counters = ib_dev->ops.create_counters(ib_dev, attrs); 72 - if (IS_ERR(counters)) { 73 - ret = PTR_ERR(counters); 74 - goto err_create_counters; 75 - } 69 + counters = rdma_zalloc_drv_obj(ib_dev, ib_counters); 70 + if (!counters) 71 + return -ENOMEM; 76 72 77 73 counters->device = ib_dev; 78 74 counters->uobject = uobj; 79 75 uobj->object = counters; 80 76 atomic_set(&counters->usecnt, 0); 81 77 82 - return 0; 78 + ret = ib_dev->ops.create_counters(counters, attrs); 79 + if (ret) 80 + kfree(counters); 83 81 84 - err_create_counters: 85 82 return ret; 86 83 } 87 84
-3
drivers/infiniband/core/uverbs_std_types_cq.c
··· 207 207 DECLARE_UVERBS_NAMED_OBJECT( 208 208 UVERBS_OBJECT_CQ, 209 209 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), 210 - 211 - #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) 212 210 &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), 213 211 &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) 214 - #endif 215 212 ); 216 213 217 214 const struct uapi_definition uverbs_def_obj_cq[] = {
+46 -2
drivers/infiniband/core/uverbs_std_types_device.c
··· 38 38 attrs->ucore.outlen < method_elm->resp_size) 39 39 return -ENOSPC; 40 40 41 - return method_elm->handler(attrs); 41 + attrs->uobject = NULL; 42 + rc = method_elm->handler(attrs); 43 + if (attrs->uobject) 44 + uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true, 45 + !rc, attrs); 46 + return rc; 42 47 } 43 48 44 49 DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, ··· 234 229 return 0; 235 230 } 236 231 232 + static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)( 233 + struct uverbs_attr_bundle *attrs) 234 + { 235 + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; 236 + struct ib_ucontext *ucontext; 237 + struct ib_device *ib_dev; 238 + u32 num_comp; 239 + int ret; 240 + 241 + ucontext = ib_uverbs_get_ucontext(attrs); 242 + if (IS_ERR(ucontext)) 243 + return PTR_ERR(ucontext); 244 + ib_dev = ucontext->device; 245 + 246 + if (!ib_dev->ops.query_ucontext) 247 + return -EOPNOTSUPP; 248 + 249 + num_comp = attrs->ufile->device->num_comp_vectors; 250 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, 251 + &num_comp, sizeof(num_comp)); 252 + if (IS_UVERBS_COPY_ERR(ret)) 253 + return ret; 254 + 255 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, 256 + &core_support, sizeof(core_support)); 257 + if (IS_UVERBS_COPY_ERR(ret)) 258 + return ret; 259 + 260 + return ucontext->device->ops.query_ucontext(ucontext, attrs); 261 + } 262 + 237 263 DECLARE_UVERBS_NAMED_METHOD( 238 264 UVERBS_METHOD_GET_CONTEXT, 239 265 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, ··· 272 236 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, 273 237 UVERBS_ATTR_TYPE(u64), UA_OPTIONAL), 274 238 UVERBS_ATTR_UHW()); 239 + 240 + DECLARE_UVERBS_NAMED_METHOD( 241 + UVERBS_METHOD_QUERY_CONTEXT, 242 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, 243 + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), 244 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, 245 + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL)); 275 246 276 247 DECLARE_UVERBS_NAMED_METHOD( 277 248 UVERBS_METHOD_INFO_HANDLES, ··· 303 260 &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), 304 261 &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), 305 262 &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), 306 - &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); 263 + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT), 264 + &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT)); 307 265 308 266 const struct uapi_definition uverbs_def_obj_device[] = { 309 267 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+52 -2
drivers/infiniband/core/uverbs_std_types_mr.c
··· 69 69 70 70 num_sge = uverbs_attr_ptr_get_array_size( 71 71 attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); 72 - if (num_sge < 0) 72 + if (num_sge <= 0) 73 73 return num_sge; 74 74 75 75 sg_list = uverbs_attr_get_alloced_ptr(attrs, ··· 148 148 return ret; 149 149 } 150 150 151 + static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( 152 + struct uverbs_attr_bundle *attrs) 153 + { 154 + struct ib_mr *mr = 155 + uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE); 156 + int ret; 157 + 158 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey, 159 + sizeof(mr->lkey)); 160 + if (ret) 161 + return ret; 162 + 163 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY, 164 + &mr->rkey, sizeof(mr->rkey)); 165 + 166 + if (ret) 167 + return ret; 168 + 169 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH, 170 + &mr->length, sizeof(mr->length)); 171 + 172 + if (ret) 173 + return ret; 174 + 175 + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA, 176 + &mr->iova, sizeof(mr->iova)); 177 + 178 + return IS_UVERBS_COPY_ERR(ret) ? ret : 0; 179 + } 180 + 151 181 DECLARE_UVERBS_NAMED_METHOD( 152 182 UVERBS_METHOD_ADVISE_MR, 153 183 UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, ··· 194 164 UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)), 195 165 UA_MANDATORY, 196 166 UA_ALLOC_AND_COPY)); 167 + 168 + DECLARE_UVERBS_NAMED_METHOD( 169 + UVERBS_METHOD_QUERY_MR, 170 + UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE, 171 + UVERBS_OBJECT_MR, 172 + UVERBS_ACCESS_READ, 173 + UA_MANDATORY), 174 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY, 175 + UVERBS_ATTR_TYPE(u32), 176 + UA_MANDATORY), 177 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY, 178 + UVERBS_ATTR_TYPE(u32), 179 + UA_MANDATORY), 180 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH, 181 + UVERBS_ATTR_TYPE(u64), 182 + UA_MANDATORY), 183 + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA, 184 + UVERBS_ATTR_TYPE(u64), 185 + UA_OPTIONAL)); 197 186 198 187 DECLARE_UVERBS_NAMED_METHOD( 199 188 UVERBS_METHOD_DM_MR_REG, ··· 255 206 UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), 256 207 &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), 257 208 &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), 258 - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); 209 + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), 210 + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); 259 211 260 212 const struct uapi_definition uverbs_def_obj_mr[] = { 261 213 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+78 -107
drivers/infiniband/core/verbs.c
··· 988 988 * @srq_init_attr: A list of initial attributes required to create the 989 989 * SRQ. If SRQ creation succeeds, then the attributes are updated to 990 990 * the actual capabilities of the created SRQ. 991 - * @uobject - uobject pointer if this is not a kernel SRQ 992 - * @udata - udata pointer if this is not a kernel SRQ 991 + * @uobject: uobject pointer if this is not a kernel SRQ 992 + * @udata: udata pointer if this is not a kernel SRQ 993 993 * 994 994 * srq_attr->max_wr and srq_attr->max_sge are read the determine the 995 995 * requested size of the SRQ, and set to the actual values allocated ··· 1090 1090 spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); 1091 1091 } 1092 1092 1093 - static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) 1094 - { 1095 - mutex_lock(&xrcd->tgt_qp_mutex); 1096 - list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); 1097 - mutex_unlock(&xrcd->tgt_qp_mutex); 1098 - } 1099 - 1100 1093 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, 1101 1094 void (*event_handler)(struct ib_event *, void *), 1102 1095 void *qp_context) ··· 1132 1139 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) 1133 1140 return ERR_PTR(-EINVAL); 1134 1141 1135 - qp = ERR_PTR(-EINVAL); 1136 - mutex_lock(&xrcd->tgt_qp_mutex); 1137 - list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { 1138 - if (real_qp->qp_num == qp_open_attr->qp_num) { 1139 - qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 1140 - qp_open_attr->qp_context); 1141 - break; 1142 - } 1142 + down_read(&xrcd->tgt_qps_rwsem); 1143 + real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); 1144 + if (!real_qp) { 1145 + up_read(&xrcd->tgt_qps_rwsem); 1146 + return ERR_PTR(-EINVAL); 1143 1147 } 1144 - mutex_unlock(&xrcd->tgt_qp_mutex); 1148 + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 1149 + qp_open_attr->qp_context); 1150 + up_read(&xrcd->tgt_qps_rwsem); 1145 1151 return qp; 1146 1152 } 1147 1153 EXPORT_SYMBOL(ib_open_qp); ··· 1149 1157 struct ib_qp_init_attr *qp_init_attr) 1150 1158 { 1151 1159 struct ib_qp *real_qp = qp; 1160 + int err; 1152 1161 1153 1162 qp->event_handler = __ib_shared_qp_event_handler; 1154 1163 qp->qp_context = qp; ··· 1165 1172 if (IS_ERR(qp)) 1166 1173 return qp; 1167 1174 1168 - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 1175 + err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, 1176 + real_qp, GFP_KERNEL)); 1177 + if (err) { 1178 + ib_close_qp(qp); 1179 + return ERR_PTR(err); 1180 + } 1169 1181 return qp; 1170 1182 } 1171 1183 ··· 1710 1712 if (!(rdma_protocol_ib(qp->device, 1711 1713 attr->alt_ah_attr.port_num) && 1712 1714 rdma_protocol_ib(qp->device, port))) { 1713 - ret = EINVAL; 1715 + ret = -EINVAL; 1714 1716 goto out; 1715 1717 } 1716 1718 } ··· 1885 1887 1886 1888 real_qp = qp->real_qp; 1887 1889 xrcd = real_qp->xrcd; 1888 - 1889 - mutex_lock(&xrcd->tgt_qp_mutex); 1890 + down_write(&xrcd->tgt_qps_rwsem); 1890 1891 ib_close_qp(qp); 1891 1892 if (atomic_read(&real_qp->usecnt) == 0) 1892 - list_del(&real_qp->xrcd_list); 1893 + xa_erase(&xrcd->tgt_qps, real_qp->qp_num); 1893 1894 else 1894 1895 real_qp = NULL; 1895 - mutex_unlock(&xrcd->tgt_qp_mutex); 1896 + up_write(&xrcd->tgt_qps_rwsem); 1896 1897 1897 1898 if (real_qp) { 1898 1899 ret = ib_destroy_qp(real_qp); 1899 1900 if (!ret) 1900 1901 atomic_dec(&xrcd->usecnt); 1901 - else 1902 - __ib_insert_xrcd_qp(xrcd, real_qp); 1903 1902 } 1904 1903 1905 1904 return 0; ··· 2072 2077 if (!pd->device->ops.advise_mr) 2073 2078 return -EOPNOTSUPP; 2074 2079 2080 + if (!num_sge) 2081 + return 0; 2082 + 2075 2083 return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, 2076 2084 NULL); 2077 2085 } ··· 2102 2104 EXPORT_SYMBOL(ib_dereg_mr_user); 2103 2105 2104 2106 /** 2105 - * ib_alloc_mr_user() - Allocates a memory region 2107 + * ib_alloc_mr() - Allocates a memory region 2106 2108 * @pd: protection domain associated with the region 2107 2109 * @mr_type: memory region type 2108 2110 * @max_num_sg: maximum sg entries available for registration. 2109 - * @udata: user data or null for kernel objects 2110 2111 * 2111 2112 * Notes: 2112 2113 * Memory registeration page/sg lists must not exceed max_num_sg. ··· 2113 2116 * max_num_sg * used_page_size. 2114 2117 * 2115 2118 */ 2116 - struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, 2117 - u32 max_num_sg, struct ib_udata *udata) 2119 + struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2120 + u32 max_num_sg) 2118 2121 { 2119 2122 struct ib_mr *mr; 2120 2123 ··· 2129 2132 goto out; 2130 2133 } 2131 2134 2132 - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); 2133 - if (!IS_ERR(mr)) { 2134 - mr->device = pd->device; 2135 - mr->pd = pd; 2136 - mr->dm = NULL; 2137 - mr->uobject = NULL; 2138 - atomic_inc(&pd->usecnt); 2139 - mr->need_inval = false; 2140 - mr->res.type = RDMA_RESTRACK_MR; 2141 - rdma_restrack_kadd(&mr->res); 2142 - mr->type = mr_type; 2143 - mr->sig_attrs = NULL; 2144 - } 2135 + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); 2136 + if (IS_ERR(mr)) 2137 + goto out; 2138 + 2139 + mr->device = pd->device; 2140 + mr->pd = pd; 2141 + mr->dm = NULL; 2142 + mr->uobject = NULL; 2143 + atomic_inc(&pd->usecnt); 2144 + mr->need_inval = false; 2145 + mr->res.type = RDMA_RESTRACK_MR; 2146 + rdma_restrack_kadd(&mr->res); 2147 + mr->type = mr_type; 2148 + mr->sig_attrs = NULL; 2145 2149 2146 2150 out: 2147 2151 trace_mr_alloc(pd, mr_type, max_num_sg, mr); 2148 2152 return mr; 2149 2153 } 2150 - EXPORT_SYMBOL(ib_alloc_mr_user); 2154 + EXPORT_SYMBOL(ib_alloc_mr); 2151 2155 2152 2156 /** 2153 2157 * ib_alloc_mr_integrity() - Allocates an integrity memory region ··· 2286 2288 } 2287 2289 EXPORT_SYMBOL(ib_detach_mcast); 2288 2290 2289 - struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) 2291 + /** 2292 + * ib_alloc_xrcd_user - Allocates an XRC domain. 2293 + * @device: The device on which to allocate the XRC domain. 2294 + * @inode: inode to connect XRCD 2295 + * @udata: Valid user data or NULL for kernel object 2296 + */ 2297 + struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, 2298 + struct inode *inode, struct ib_udata *udata) 2290 2299 { 2291 2300 struct ib_xrcd *xrcd; 2301 + int ret; 2292 2302 2293 2303 if (!device->ops.alloc_xrcd) 2294 2304 return ERR_PTR(-EOPNOTSUPP); 2295 2305 2296 - xrcd = device->ops.alloc_xrcd(device, NULL); 2297 - if (!IS_ERR(xrcd)) { 2298 - xrcd->device = device; 2299 - xrcd->inode = NULL; 2300 - atomic_set(&xrcd->usecnt, 0); 2301 - mutex_init(&xrcd->tgt_qp_mutex); 2302 - INIT_LIST_HEAD(&xrcd->tgt_qp_list); 2303 - } 2306 + xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); 2307 + if (!xrcd) 2308 + return ERR_PTR(-ENOMEM); 2304 2309 2310 + xrcd->device = device; 2311 + xrcd->inode = inode; 2312 + atomic_set(&xrcd->usecnt, 0); 2313 + init_rwsem(&xrcd->tgt_qps_rwsem); 2314 + xa_init(&xrcd->tgt_qps); 2315 + 2316 + ret = device->ops.alloc_xrcd(xrcd, udata); 2317 + if (ret) 2318 + goto err; 2305 2319 return xrcd; 2320 + err: 2321 + kfree(xrcd); 2322 + return ERR_PTR(ret); 2306 2323 } 2307 - EXPORT_SYMBOL(__ib_alloc_xrcd); 2324 + EXPORT_SYMBOL(ib_alloc_xrcd_user); 2308 2325 2309 - int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 2326 + /** 2327 + * ib_dealloc_xrcd_user - Deallocates an XRC domain. 2328 + * @xrcd: The XRC domain to deallocate. 2329 + * @udata: Valid user data or NULL for kernel object 2330 + */ 2331 + int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) 2310 2332 { 2311 - struct ib_qp *qp; 2312 - int ret; 2313 - 2314 2333 if (atomic_read(&xrcd->usecnt)) 2315 2334 return -EBUSY; 2316 2335 2317 - while (!list_empty(&xrcd->tgt_qp_list)) { 2318 - qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); 2319 - ret = ib_destroy_qp(qp); 2320 - if (ret) 2321 - return ret; 2322 - } 2323 - mutex_destroy(&xrcd->tgt_qp_mutex); 2324 - 2325 - return xrcd->device->ops.dealloc_xrcd(xrcd, udata); 2336 + WARN_ON(!xa_empty(&xrcd->tgt_qps)); 2337 + xrcd->device->ops.dealloc_xrcd(xrcd, udata); 2338 + kfree(xrcd); 2339 + return 0; 2326 2340 } 2327 - EXPORT_SYMBOL(ib_dealloc_xrcd); 2341 + EXPORT_SYMBOL(ib_dealloc_xrcd_user); 2328 2342 2329 2343 /** 2330 2344 * ib_create_wq - Creates a WQ associated with the specified protection ··· 2418 2408 return err; 2419 2409 } 2420 2410 EXPORT_SYMBOL(ib_modify_wq); 2421 - 2422 - /* 2423 - * ib_create_rwq_ind_table - Creates a RQ Indirection Table. 2424 - * @device: The device on which to create the rwq indirection table. 2425 - * @ib_rwq_ind_table_init_attr: A list of initial attributes required to 2426 - * create the Indirection Table. 2427 - * 2428 - * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less 2429 - * than the created ib_rwq_ind_table object and the caller is responsible 2430 - * for its memory allocation/free. 2431 - */ 2432 - struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, 2433 - struct ib_rwq_ind_table_init_attr *init_attr) 2434 - { 2435 - struct ib_rwq_ind_table *rwq_ind_table; 2436 - int i; 2437 - u32 table_size; 2438 - 2439 - if (!device->ops.create_rwq_ind_table) 2440 - return ERR_PTR(-EOPNOTSUPP); 2441 - 2442 - table_size = (1 << init_attr->log_ind_tbl_size); 2443 - rwq_ind_table = device->ops.create_rwq_ind_table(device, 2444 - init_attr, NULL); 2445 - if (IS_ERR(rwq_ind_table)) 2446 - return rwq_ind_table; 2447 - 2448 - rwq_ind_table->ind_tbl = init_attr->ind_tbl; 2449 - rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; 2450 - rwq_ind_table->device = device; 2451 - rwq_ind_table->uobject = NULL; 2452 - atomic_set(&rwq_ind_table->usecnt, 0); 2453 - 2454 - for (i = 0; i < table_size; i++) 2455 - atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); 2456 - 2457 - return rwq_ind_table; 2458 - } 2459 - EXPORT_SYMBOL(ib_create_rwq_ind_table); 2460 2411 2461 2412 /* 2462 2413 * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
+128 -42
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 842 842 } 843 843 } 844 844 845 + static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, 846 + int rsge, int max) 847 + { 848 + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 849 + rsge = max; 850 + return bnxt_re_get_rwqe_size(rsge); 851 + } 852 + 853 + static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) 854 + { 855 + u16 wqe_size, calc_ils; 856 + 857 + wqe_size = bnxt_re_get_swqe_size(nsge); 858 + if (ilsize) { 859 + calc_ils = sizeof(struct sq_send_hdr) + ilsize; 860 + wqe_size = max_t(u16, calc_ils, wqe_size); 861 + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); 862 + } 863 + return wqe_size; 864 + } 865 + 866 + static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, 867 + struct ib_qp_init_attr *init_attr) 868 + { 869 + struct bnxt_qplib_dev_attr *dev_attr; 870 + struct bnxt_qplib_qp *qplqp; 871 + struct bnxt_re_dev *rdev; 872 + struct bnxt_qplib_q *sq; 873 + int align, ilsize; 874 + 875 + rdev = qp->rdev; 876 + qplqp = &qp->qplib_qp; 877 + sq = &qplqp->sq; 878 + dev_attr = &rdev->dev_attr; 879 + 880 + align = sizeof(struct sq_send_hdr); 881 + ilsize = ALIGN(init_attr->cap.max_inline_data, align); 882 + 883 + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); 884 + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) 885 + return -EINVAL; 886 + /* For gen p4 and gen p5 backward compatibility mode 887 + * wqe size is fixed to 128 bytes 888 + */ 889 + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && 890 + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 891 + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); 892 + 893 + if (init_attr->cap.max_inline_data) { 894 + qplqp->max_inline_data = sq->wqe_size - 895 + sizeof(struct sq_send_hdr); 896 + init_attr->cap.max_inline_data = qplqp->max_inline_data; 897 + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 898 + sq->max_sge = qplqp->max_inline_data / 899 + sizeof(struct sq_sge); 900 + } 901 + 902 + return 0; 903 + } 904 + 845 905 static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, 846 906 struct bnxt_re_qp *qp, struct ib_udata *udata) 847 907 { 908 + struct bnxt_qplib_qp *qplib_qp; 909 + struct bnxt_re_ucontext *cntx; 848 910 struct bnxt_re_qp_req ureq; 849 - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; 850 - struct ib_umem *umem; 851 911 int bytes = 0, psn_sz; 852 - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( 853 - udata, struct bnxt_re_ucontext, ib_uctx); 912 + struct ib_umem *umem; 913 + int psn_nume; 854 914 915 + qplib_qp = &qp->qplib_qp; 916 + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, 917 + ib_uctx); 855 918 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) 856 919 return -EFAULT; 857 920 ··· 922 859 /* Consider mapping PSN search memory only for RC QPs. */ 923 860 if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { 924 861 psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? 925 - sizeof(struct sq_psn_search_ext) : 926 - sizeof(struct sq_psn_search); 927 - bytes += (qplib_qp->sq.max_wqe * psn_sz); 862 + sizeof(struct sq_psn_search_ext) : 863 + sizeof(struct sq_psn_search); 864 + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 865 + qplib_qp->sq.max_wqe : 866 + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / 867 + sizeof(struct bnxt_qplib_sge)); 868 + bytes += (psn_nume * psn_sz); 928 869 } 870 + 929 871 bytes = PAGE_ALIGN(bytes); 930 872 umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, 931 873 IB_ACCESS_LOCAL_WRITE); ··· 1043 975 qp->qplib_qp.sig_type = true; 1044 976 1045 977 /* Shadow QP SQ depth should be same as QP1 RQ depth */ 1046 - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); 978 + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); 1047 979 qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; 1048 980 qp->qplib_qp.sq.max_sge = 2; 1049 981 /* Q full delta can be 1 since it is internal QP */ ··· 1054 986 qp->qplib_qp.scq = qp1_qp->scq; 1055 987 qp->qplib_qp.rcq = qp1_qp->rcq; 1056 988 1057 - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); 989 + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); 1058 990 qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; 1059 991 qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; 1060 992 /* Q full delta can be 1 since it is internal QP */ ··· 1109 1041 qplqp->srq = &srq->qplib_srq; 1110 1042 rq->max_wqe = 0; 1111 1043 } else { 1112 - rq->wqe_size = bnxt_re_get_rwqe_size(); 1044 + rq->max_sge = init_attr->cap.max_recv_sge; 1045 + if (rq->max_sge > dev_attr->max_qp_sges) 1046 + rq->max_sge = dev_attr->max_qp_sges; 1047 + init_attr->cap.max_recv_sge = rq->max_sge; 1048 + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, 1049 + dev_attr->max_qp_sges); 1113 1050 /* Allocate 1 more than what's provided so posting max doesn't 1114 1051 * mean empty. 1115 1052 */ 1116 1053 entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); 1117 1054 rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); 1118 - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; 1119 - rq->max_sge = init_attr->cap.max_recv_sge; 1120 - if (rq->max_sge > dev_attr->max_qp_sges) 1121 - rq->max_sge = dev_attr->max_qp_sges; 1055 + rq->q_full_delta = 0; 1056 + rq->sg_info.pgsize = PAGE_SIZE; 1057 + rq->sg_info.pgshft = PAGE_SHIFT; 1122 1058 } 1123 - rq->sg_info.pgsize = PAGE_SIZE; 1124 - rq->sg_info.pgshft = PAGE_SHIFT; 1125 1059 1126 1060 return 0; 1127 1061 } ··· 1138 1068 qplqp = &qp->qplib_qp; 1139 1069 dev_attr = &rdev->dev_attr; 1140 1070 1141 - qplqp->rq.max_sge = dev_attr->max_qp_sges; 1142 - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) 1071 + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { 1143 1072 qplqp->rq.max_sge = dev_attr->max_qp_sges; 1144 - qplqp->rq.max_sge = 6; 1073 + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) 1074 + qplqp->rq.max_sge = dev_attr->max_qp_sges; 1075 + qplqp->rq.max_sge = 6; 1076 + } 1145 1077 } 1146 1078 1147 - static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 1148 - struct ib_qp_init_attr *init_attr, 1149 - struct ib_udata *udata) 1079 + static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 1080 + struct ib_qp_init_attr *init_attr, 1081 + struct ib_udata *udata) 1150 1082 { 1151 1083 struct bnxt_qplib_dev_attr *dev_attr; 1152 1084 struct bnxt_qplib_qp *qplqp; 1153 1085 struct bnxt_re_dev *rdev; 1154 1086 struct bnxt_qplib_q *sq; 1155 1087 int entries; 1088 + int diff; 1089 + int rc; 1156 1090 1157 1091 rdev = qp->rdev; 1158 1092 qplqp = &qp->qplib_qp; 1159 1093 sq = &qplqp->sq; 1160 1094 dev_attr = &rdev->dev_attr; 1161 1095 1162 - sq->wqe_size = bnxt_re_get_swqe_size(); 1163 1096 sq->max_sge = init_attr->cap.max_send_sge; 1164 - if (sq->max_sge > dev_attr->max_qp_sges) 1097 + if (sq->max_sge > dev_attr->max_qp_sges) { 1165 1098 sq->max_sge = dev_attr->max_qp_sges; 1166 - /* 1167 - * Change the SQ depth if user has requested minimum using 1168 - * configfs. Only supported for kernel consumers 1169 - */ 1099 + init_attr->cap.max_send_sge = sq->max_sge; 1100 + } 1101 + 1102 + rc = bnxt_re_setup_swqe_size(qp, init_attr); 1103 + if (rc) 1104 + return rc; 1105 + 1170 1106 entries = init_attr->cap.max_send_wr; 1171 1107 /* Allocate 128 + 1 more than what's provided */ 1172 - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); 1173 - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1174 - BNXT_QPLIB_RESERVED_QP_WRS + 1); 1175 - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; 1108 + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? 1109 + 0 : BNXT_QPLIB_RESERVED_QP_WRS; 1110 + entries = roundup_pow_of_two(entries + diff + 1); 1111 + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); 1112 + sq->q_full_delta = diff + 1; 1176 1113 /* 1177 1114 * Reserving one slot for Phantom WQE. Application can 1178 1115 * post one extra entry in this case. But allowing this to avoid ··· 1188 1111 qplqp->sq.q_full_delta -= 1; 1189 1112 qplqp->sq.sg_info.pgsize = PAGE_SIZE; 1190 1113 qplqp->sq.sg_info.pgshft = PAGE_SHIFT; 1114 + 1115 + return 0; 1191 1116 } 1192 1117 1193 1118 static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, ··· 1204 1125 qplqp = &qp->qplib_qp; 1205 1126 dev_attr = &rdev->dev_attr; 1206 1127 1207 - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); 1208 - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); 1209 - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - 1210 - init_attr->cap.max_send_wr; 1211 - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ 1212 - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) 1213 - qplqp->sq.max_sge = dev_attr->max_qp_sges; 1128 + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { 1129 + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); 1130 + qplqp->sq.max_wqe = min_t(u32, entries, 1131 + dev_attr->max_qp_wqes + 1); 1132 + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - 1133 + init_attr->cap.max_send_wr; 1134 + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ 1135 + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) 1136 + qplqp->sq.max_sge = dev_attr->max_qp_sges; 1137 + } 1214 1138 } 1215 1139 1216 1140 static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, ··· 1265 1183 goto out; 1266 1184 } 1267 1185 qplqp->type = (u8)qptype; 1186 + qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode; 1268 1187 1269 1188 if (init_attr->qp_type == IB_QPT_RC) { 1270 1189 qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; ··· 1309 1226 bnxt_re_adjust_gsi_rq_attr(qp); 1310 1227 1311 1228 /* Setup SQ */ 1312 - bnxt_re_init_sq_attr(qp, init_attr, udata); 1229 + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); 1230 + if (rc) 1231 + goto out; 1313 1232 if (init_attr->qp_type == IB_QPT_GSI) 1314 1233 bnxt_re_adjust_gsi_sq_attr(qp, init_attr); 1315 1234 ··· 1659 1574 entries = dev_attr->max_srq_wqes + 1; 1660 1575 srq->qplib_srq.max_wqe = entries; 1661 1576 1662 - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); 1663 1577 srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; 1578 + srq->qplib_srq.wqe_size = 1579 + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); 1664 1580 srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; 1665 1581 srq->srq_limit = srq_init_attr->attr.srq_limit; 1666 1582 srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; ··· 3655 3569 } 3656 3570 3657 3571 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, 3658 - u32 max_num_sg, struct ib_udata *udata) 3572 + u32 max_num_sg) 3659 3573 { 3660 3574 struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); 3661 3575 struct bnxt_re_dev *rdev = pd->rdev;
+5 -5
drivers/infiniband/hw/bnxt_re/ib_verbs.h
··· 136 136 spinlock_t sh_lock; /* protect shpg */ 137 137 }; 138 138 139 - static inline u16 bnxt_re_get_swqe_size(void) 139 + static inline u16 bnxt_re_get_swqe_size(int nsge) 140 140 { 141 - return sizeof(struct sq_send); 141 + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); 142 142 } 143 143 144 - static inline u16 bnxt_re_get_rwqe_size(void) 144 + static inline u16 bnxt_re_get_rwqe_size(int nsge) 145 145 { 146 - return sizeof(struct rq_wqe); 146 + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); 147 147 } 148 148 149 149 int bnxt_re_query_device(struct ib_device *ibdev, ··· 201 201 int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, 202 202 unsigned int *sg_offset); 203 203 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, 204 - u32 max_num_sg, struct ib_udata *udata); 204 + u32 max_num_sg); 205 205 int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); 206 206 struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, 207 207 struct ib_udata *udata);
+17 -6
drivers/infiniband/hw/bnxt_re/main.c
··· 82 82 static void bnxt_re_dealloc_driver(struct ib_device *ib_dev); 83 83 static void bnxt_re_stop_irq(void *handle); 84 84 85 + static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) 86 + { 87 + struct bnxt_qplib_chip_ctx *cctx; 88 + 89 + cctx = rdev->chip_ctx; 90 + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? 91 + mode : BNXT_QPLIB_WQE_MODE_STATIC; 92 + } 93 + 85 94 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) 86 95 { 87 96 struct bnxt_qplib_chip_ctx *chip_ctx; ··· 106 97 kfree(chip_ctx); 107 98 } 108 99 109 - static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) 100 + static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) 110 101 { 111 102 struct bnxt_qplib_chip_ctx *chip_ctx; 112 103 struct bnxt_en_dev *en_dev; ··· 126 117 rdev->qplib_res.cctx = rdev->chip_ctx; 127 118 rdev->rcfw.res = &rdev->qplib_res; 128 119 120 + bnxt_re_set_drv_mode(rdev, wqe_mode); 129 121 return 0; 130 122 } 131 123 ··· 1396 1386 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); 1397 1387 } 1398 1388 1399 - static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) 1389 + static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) 1400 1390 { 1401 1391 struct bnxt_qplib_creq_ctx *creq; 1402 1392 struct bnxt_re_ring_attr rattr; ··· 1416 1406 } 1417 1407 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1418 1408 1419 - rc = bnxt_re_setup_chip_ctx(rdev); 1409 + rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); 1420 1410 if (rc) { 1421 1411 ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); 1422 1412 return -EINVAL; ··· 1595 1585 } 1596 1586 1597 1587 static int bnxt_re_add_device(struct bnxt_re_dev **rdev, 1598 - struct net_device *netdev) 1588 + struct net_device *netdev, u8 wqe_mode) 1599 1589 { 1600 1590 int rc; 1601 1591 ··· 1609 1599 } 1610 1600 1611 1601 pci_dev_get((*rdev)->en_dev->pdev); 1612 - rc = bnxt_re_dev_init(*rdev); 1602 + rc = bnxt_re_dev_init(*rdev, wqe_mode); 1613 1603 if (rc) { 1614 1604 pci_dev_put((*rdev)->en_dev->pdev); 1615 1605 bnxt_re_dev_unreg(*rdev); ··· 1721 1711 case NETDEV_REGISTER: 1722 1712 if (rdev) 1723 1713 break; 1724 - rc = bnxt_re_add_device(&rdev, real_dev); 1714 + rc = bnxt_re_add_device(&rdev, real_dev, 1715 + BNXT_QPLIB_WQE_MODE_STATIC); 1725 1716 if (!rc) 1726 1717 sch_work = true; 1727 1718 release = false;
+424 -325
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 178 178 179 179 if (qp->rq_hdr_buf) 180 180 dma_free_coherent(&res->pdev->dev, 181 - rq->hwq.max_elements * qp->rq_hdr_buf_size, 181 + rq->max_wqe * qp->rq_hdr_buf_size, 182 182 qp->rq_hdr_buf, qp->rq_hdr_buf_map); 183 183 if (qp->sq_hdr_buf) 184 184 dma_free_coherent(&res->pdev->dev, 185 - sq->hwq.max_elements * qp->sq_hdr_buf_size, 185 + sq->max_wqe * qp->sq_hdr_buf_size, 186 186 qp->sq_hdr_buf, qp->sq_hdr_buf_map); 187 187 qp->rq_hdr_buf = NULL; 188 188 qp->sq_hdr_buf = NULL; ··· 199 199 struct bnxt_qplib_q *sq = &qp->sq; 200 200 int rc = 0; 201 201 202 - if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { 202 + if (qp->sq_hdr_buf_size && sq->max_wqe) { 203 203 qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, 204 - sq->hwq.max_elements * 205 - qp->sq_hdr_buf_size, 204 + sq->max_wqe * qp->sq_hdr_buf_size, 206 205 &qp->sq_hdr_buf_map, GFP_KERNEL); 207 206 if (!qp->sq_hdr_buf) { 208 207 rc = -ENOMEM; ··· 211 212 } 212 213 } 213 214 214 - if (qp->rq_hdr_buf_size && rq->hwq.max_elements) { 215 + if (qp->rq_hdr_buf_size && rq->max_wqe) { 215 216 qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, 216 - rq->hwq.max_elements * 217 + rq->max_wqe * 217 218 qp->rq_hdr_buf_size, 218 219 &qp->rq_hdr_buf_map, 219 220 GFP_KERNEL); ··· 660 661 srq->dbinfo.hwq = &srq->hwq; 661 662 srq->dbinfo.xid = srq->id; 662 663 srq->dbinfo.db = srq->dpi->dbr; 664 + srq->dbinfo.max_slot = 1; 663 665 srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; 664 666 if (srq->threshold) 665 667 bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); ··· 784 784 } 785 785 786 786 /* QP */ 787 + 788 + static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) 789 + { 790 + int rc = 0; 791 + int indx; 792 + 793 + que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); 794 + if (!que->swq) { 795 + rc = -ENOMEM; 796 + goto out; 797 + } 798 + 799 + que->swq_start = 0; 800 + que->swq_last = que->max_wqe - 1; 801 + for (indx = 0; indx < que->max_wqe; indx++) 802 + que->swq[indx].next_idx = indx + 1; 803 + que->swq[que->swq_last].next_idx = 0; /* Make it circular */ 804 + que->swq_last = 0; 805 + out: 806 + return rc; 807 + } 808 + 787 809 int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) 788 810 { 789 811 struct bnxt_qplib_hwq_attr hwq_attr = {}; ··· 830 808 /* SQ */ 831 809 hwq_attr.res = res; 832 810 hwq_attr.sginfo = &sq->sg_info; 833 - hwq_attr.depth = sq->max_wqe; 834 - hwq_attr.stride = sq->wqe_size; 811 + hwq_attr.stride = sizeof(struct sq_sge); 812 + hwq_attr.depth = bnxt_qplib_get_depth(sq); 835 813 hwq_attr.type = HWQ_TYPE_QUEUE; 836 814 rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); 837 815 if (rc) 838 816 goto exit; 839 817 840 - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); 841 - if (!sq->swq) { 842 - rc = -ENOMEM; 818 + rc = bnxt_qplib_alloc_init_swq(sq); 819 + if (rc) 843 820 goto fail_sq; 844 - } 821 + 822 + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); 845 823 pbl = &sq->hwq.pbl[PBL_LVL_0]; 846 824 req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 847 825 pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << 848 826 CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); 849 827 pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); 850 828 req.sq_pg_size_sq_lvl = pg_sz_lvl; 829 + req.sq_fwo_sq_sge = 830 + cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << 831 + CMDQ_CREATE_QP1_SQ_SGE_SFT); 832 + req.scq_cid = cpu_to_le32(qp->scq->id); 851 833 852 - if (qp->scq) 853 - req.scq_cid = cpu_to_le32(qp->scq->id); 854 834 /* RQ */ 855 835 if (rq->max_wqe) { 856 836 hwq_attr.res = res; 857 837 hwq_attr.sginfo = &rq->sg_info; 858 - hwq_attr.stride = rq->wqe_size; 859 - hwq_attr.depth = qp->rq.max_wqe; 838 + hwq_attr.stride = sizeof(struct sq_sge); 839 + hwq_attr.depth = bnxt_qplib_get_depth(rq); 860 840 hwq_attr.type = HWQ_TYPE_QUEUE; 861 841 rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); 862 842 if (rc) 863 - goto fail_sq; 864 - 865 - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), 866 - GFP_KERNEL); 867 - if (!rq->swq) { 868 - rc = -ENOMEM; 843 + goto sq_swq; 844 + rc = bnxt_qplib_alloc_init_swq(rq); 845 + if (rc) 869 846 goto fail_rq; 870 - } 847 + req.rq_size = cpu_to_le32(rq->max_wqe); 871 848 pbl = &rq->hwq.pbl[PBL_LVL_0]; 872 849 req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 873 850 pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << 874 851 CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); 875 852 pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); 876 853 req.rq_pg_size_rq_lvl = pg_sz_lvl; 877 - if (qp->rcq) 878 - req.rcq_cid = cpu_to_le32(qp->rcq->id); 854 + req.rq_fwo_rq_sge = 855 + cpu_to_le16((rq->max_sge & 856 + CMDQ_CREATE_QP1_RQ_SGE_MASK) << 857 + CMDQ_CREATE_QP1_RQ_SGE_SFT); 879 858 } 859 + req.rcq_cid = cpu_to_le32(qp->rcq->id); 880 860 /* Header buffer - allow hdr_buf pass in */ 881 861 rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); 882 862 if (rc) { 883 863 rc = -ENOMEM; 884 - goto fail; 864 + goto rq_rwq; 885 865 } 886 866 qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; 887 867 req.qp_flags = cpu_to_le32(qp_flags); 888 - req.sq_size = cpu_to_le32(sq->hwq.max_elements); 889 - req.rq_size = cpu_to_le32(rq->hwq.max_elements); 890 - 891 - req.sq_fwo_sq_sge = 892 - cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << 893 - CMDQ_CREATE_QP1_SQ_SGE_SFT); 894 - req.rq_fwo_rq_sge = 895 - cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) << 896 - CMDQ_CREATE_QP1_RQ_SGE_SFT); 897 - 898 868 req.pd_id = cpu_to_le32(qp->pd->id); 899 869 900 870 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, ··· 900 886 sq->dbinfo.hwq = &sq->hwq; 901 887 sq->dbinfo.xid = qp->id; 902 888 sq->dbinfo.db = qp->dpi->dbr; 889 + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); 903 890 if (rq->max_wqe) { 904 891 rq->dbinfo.hwq = &rq->hwq; 905 892 rq->dbinfo.xid = qp->id; 906 893 rq->dbinfo.db = qp->dpi->dbr; 894 + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); 907 895 } 908 896 rcfw->qp_tbl[qp->id].qp_id = qp->id; 909 897 rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; ··· 914 898 915 899 fail: 916 900 bnxt_qplib_free_qp_hdr_buf(res, qp); 901 + rq_rwq: 902 + kfree(rq->swq); 917 903 fail_rq: 918 904 bnxt_qplib_free_hwq(res, &rq->hwq); 919 - kfree(rq->swq); 905 + sq_swq: 906 + kfree(sq->swq); 920 907 fail_sq: 921 908 bnxt_qplib_free_hwq(res, &sq->hwq); 922 - kfree(sq->swq); 923 909 exit: 924 910 return rc; 925 911 } ··· 930 912 { 931 913 struct bnxt_qplib_hwq *hwq; 932 914 struct bnxt_qplib_q *sq; 933 - u64 fpsne, psne, psn_pg; 934 - u16 indx_pad = 0, indx; 935 - u16 pg_num, pg_indx; 936 - u64 *page; 915 + u64 fpsne, psn_pg; 916 + u16 indx_pad = 0; 937 917 938 918 sq = &qp->sq; 939 919 hwq = &sq->hwq; 940 - 941 - fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); 920 + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); 942 921 if (!IS_ALIGNED(fpsne, PAGE_SIZE)) 943 922 indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; 944 923 945 - page = (u64 *)psn_pg; 946 - for (indx = 0; indx < hwq->max_elements; indx++) { 947 - pg_num = (indx + indx_pad) / (PAGE_SIZE / size); 948 - pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); 949 - psne = page[pg_num] + pg_indx * size; 950 - sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; 951 - sq->swq[indx].psn_search = (struct sq_psn_search *)psne; 952 - } 924 + hwq->pad_pgofft = indx_pad; 925 + hwq->pad_pg = (u64 *)psn_pg; 926 + hwq->pad_stride = size; 953 927 } 954 928 955 929 int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) ··· 954 944 struct creq_create_qp_resp resp; 955 945 int rc, req_size, psn_sz = 0; 956 946 struct bnxt_qplib_hwq *xrrq; 957 - u16 cmd_flags = 0, max_ssge; 958 947 struct bnxt_qplib_pbl *pbl; 959 948 struct cmdq_create_qp req; 949 + u16 cmd_flags = 0; 960 950 u32 qp_flags = 0; 961 951 u8 pg_sz_lvl; 962 - u16 max_rsge; 952 + u16 nsge; 963 953 964 954 RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); 965 955 ··· 977 967 978 968 hwq_attr.res = res; 979 969 hwq_attr.sginfo = &sq->sg_info; 980 - hwq_attr.stride = sq->wqe_size; 981 - hwq_attr.depth = sq->max_wqe; 970 + hwq_attr.stride = sizeof(struct sq_sge); 971 + hwq_attr.depth = bnxt_qplib_get_depth(sq); 982 972 hwq_attr.aux_stride = psn_sz; 983 - hwq_attr.aux_depth = hwq_attr.depth; 973 + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); 984 974 hwq_attr.type = HWQ_TYPE_QUEUE; 985 975 rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); 986 976 if (rc) 987 977 goto exit; 988 978 989 - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); 990 - if (!sq->swq) { 991 - rc = -ENOMEM; 979 + rc = bnxt_qplib_alloc_init_swq(sq); 980 + if (rc) 992 981 goto fail_sq; 993 - } 994 982 995 983 if (psn_sz) 996 984 bnxt_qplib_init_psn_ptr(qp, psn_sz); 997 985 986 + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); 998 987 pbl = &sq->hwq.pbl[PBL_LVL_0]; 999 988 req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 1000 989 pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << 1001 990 CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); 1002 991 pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); 1003 992 req.sq_pg_size_sq_lvl = pg_sz_lvl; 1004 - 1005 - if (qp->scq) 1006 - req.scq_cid = cpu_to_le32(qp->scq->id); 993 + req.sq_fwo_sq_sge = 994 + cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) << 995 + CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); 996 + req.scq_cid = cpu_to_le32(qp->scq->id); 1007 997 1008 998 /* RQ */ 1009 - if (rq->max_wqe) { 999 + if (!qp->srq) { 1010 1000 hwq_attr.res = res; 1011 1001 hwq_attr.sginfo = &rq->sg_info; 1012 - hwq_attr.stride = rq->wqe_size; 1013 - hwq_attr.depth = rq->max_wqe; 1002 + hwq_attr.stride = sizeof(struct sq_sge); 1003 + hwq_attr.depth = bnxt_qplib_get_depth(rq); 1014 1004 hwq_attr.aux_stride = 0; 1015 1005 hwq_attr.aux_depth = 0; 1016 1006 hwq_attr.type = HWQ_TYPE_QUEUE; 1017 1007 rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); 1018 1008 if (rc) 1019 - goto fail_sq; 1020 - 1021 - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), 1022 - GFP_KERNEL); 1023 - if (!rq->swq) { 1024 - rc = -ENOMEM; 1009 + goto sq_swq; 1010 + rc = bnxt_qplib_alloc_init_swq(rq); 1011 + if (rc) 1025 1012 goto fail_rq; 1026 - } 1013 + 1014 + req.rq_size = cpu_to_le32(rq->max_wqe); 1027 1015 pbl = &rq->hwq.pbl[PBL_LVL_0]; 1028 1016 req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 1029 1017 pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << 1030 1018 CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); 1031 1019 pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); 1032 1020 req.rq_pg_size_rq_lvl = pg_sz_lvl; 1021 + nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 1022 + 6 : rq->max_sge; 1023 + req.rq_fwo_rq_sge = 1024 + cpu_to_le16(((nsge & 1025 + CMDQ_CREATE_QP_RQ_SGE_MASK) << 1026 + CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); 1033 1027 } else { 1034 1028 /* SRQ */ 1035 - if (qp->srq) { 1036 - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; 1037 - req.srq_cid = cpu_to_le32(qp->srq->id); 1038 - } 1029 + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; 1030 + req.srq_cid = cpu_to_le32(qp->srq->id); 1039 1031 } 1040 - 1041 - if (qp->rcq) 1042 - req.rcq_cid = cpu_to_le32(qp->rcq->id); 1032 + req.rcq_cid = cpu_to_le32(qp->rcq->id); 1043 1033 1044 1034 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; 1045 1035 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; 1046 1036 if (qp->sig_type) 1047 1037 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; 1038 + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) 1039 + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; 1048 1040 req.qp_flags = cpu_to_le32(qp_flags); 1049 1041 1050 - req.sq_size = cpu_to_le32(sq->hwq.max_elements); 1051 - req.rq_size = cpu_to_le32(rq->hwq.max_elements); 1052 - qp->sq_hdr_buf = NULL; 1053 - qp->rq_hdr_buf = NULL; 1054 - 1055 - rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); 1056 - if (rc) 1057 - goto fail_rq; 1058 - 1059 - /* CTRL-22434: Irrespective of the requested SGE count on the SQ 1060 - * always create the QP with max send sges possible if the requested 1061 - * inline size is greater than 0. 1062 - */ 1063 - max_ssge = qp->max_inline_data ? 6 : sq->max_sge; 1064 - req.sq_fwo_sq_sge = cpu_to_le16( 1065 - ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) 1066 - << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); 1067 - max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge; 1068 - req.rq_fwo_rq_sge = cpu_to_le16( 1069 - ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK) 1070 - << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); 1071 1042 /* ORRQ and IRRQ */ 1072 1043 if (psn_sz) { 1073 1044 xrrq = &qp->orrq; ··· 1069 1078 hwq_attr.type = HWQ_TYPE_CTX; 1070 1079 rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); 1071 1080 if (rc) 1072 - goto fail_buf_free; 1081 + goto rq_swq; 1073 1082 pbl = &xrrq->pbl[PBL_LVL_0]; 1074 1083 req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]); 1075 1084 ··· 1104 1113 sq->dbinfo.hwq = &sq->hwq; 1105 1114 sq->dbinfo.xid = qp->id; 1106 1115 sq->dbinfo.db = qp->dpi->dbr; 1116 + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); 1107 1117 if (rq->max_wqe) { 1108 1118 rq->dbinfo.hwq = &rq->hwq; 1109 1119 rq->dbinfo.xid = qp->id; 1110 1120 rq->dbinfo.db = qp->dpi->dbr; 1121 + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); 1111 1122 } 1112 1123 rcfw->qp_tbl[qp->id].qp_id = qp->id; 1113 1124 rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; 1114 1125 1115 1126 return 0; 1116 - 1117 1127 fail: 1118 - if (qp->irrq.max_elements) 1119 - bnxt_qplib_free_hwq(res, &qp->irrq); 1128 + bnxt_qplib_free_hwq(res, &qp->irrq); 1120 1129 fail_orrq: 1121 - if (qp->orrq.max_elements) 1122 - bnxt_qplib_free_hwq(res, &qp->orrq); 1123 - fail_buf_free: 1124 - bnxt_qplib_free_qp_hdr_buf(res, qp); 1130 + bnxt_qplib_free_hwq(res, &qp->orrq); 1131 + rq_swq: 1132 + kfree(rq->swq); 1125 1133 fail_rq: 1126 1134 bnxt_qplib_free_hwq(res, &rq->hwq); 1127 - kfree(rq->swq); 1135 + sq_swq: 1136 + kfree(sq->swq); 1128 1137 fail_sq: 1129 1138 bnxt_qplib_free_hwq(res, &sq->hwq); 1130 - kfree(sq->swq); 1131 1139 exit: 1132 1140 return rc; 1133 1141 } ··· 1502 1512 memset(sge, 0, sizeof(*sge)); 1503 1513 1504 1514 if (qp->sq_hdr_buf) { 1505 - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); 1515 + sw_prod = sq->swq_start; 1506 1516 sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map + 1507 1517 sw_prod * qp->sq_hdr_buf_size); 1508 1518 sge->lkey = 0xFFFFFFFF; ··· 1516 1526 { 1517 1527 struct bnxt_qplib_q *rq = &qp->rq; 1518 1528 1519 - return HWQ_CMP(rq->hwq.prod, &rq->hwq); 1529 + return rq->swq_start; 1520 1530 } 1521 1531 1522 1532 dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index) ··· 1533 1543 memset(sge, 0, sizeof(*sge)); 1534 1544 1535 1545 if (qp->rq_hdr_buf) { 1536 - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); 1546 + sw_prod = rq->swq_start; 1537 1547 sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map + 1538 1548 sw_prod * qp->rq_hdr_buf_size); 1539 1549 sge->lkey = 0xFFFFFFFF; ··· 1552 1562 u32 flg_npsn; 1553 1563 u32 op_spsn; 1554 1564 1565 + if (!swq->psn_search) 1566 + return; 1555 1567 psns = swq->psn_search; 1556 1568 psns_ext = swq->psn_ext; 1557 1569 ··· 1567 1575 if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { 1568 1576 psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); 1569 1577 psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); 1578 + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); 1570 1579 } else { 1571 1580 psns->opcode_start_psn = cpu_to_le32(op_spsn); 1572 1581 psns->flags_next_psn = cpu_to_le32(flg_npsn); 1573 1582 } 1583 + } 1584 + 1585 + static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, 1586 + struct bnxt_qplib_swqe *wqe, 1587 + u16 *idx) 1588 + { 1589 + struct bnxt_qplib_hwq *hwq; 1590 + int len, t_len, offt; 1591 + bool pull_dst = true; 1592 + void *il_dst = NULL; 1593 + void *il_src = NULL; 1594 + int t_cplen, cplen; 1595 + int indx; 1596 + 1597 + hwq = &qp->sq.hwq; 1598 + t_len = 0; 1599 + for (indx = 0; indx < wqe->num_sge; indx++) { 1600 + len = wqe->sg_list[indx].size; 1601 + il_src = (void *)wqe->sg_list[indx].addr; 1602 + t_len += len; 1603 + if (t_len > qp->max_inline_data) 1604 + goto bad; 1605 + while (len) { 1606 + if (pull_dst) { 1607 + pull_dst = false; 1608 + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); 1609 + (*idx)++; 1610 + t_cplen = 0; 1611 + offt = 0; 1612 + } 1613 + cplen = min_t(int, len, sizeof(struct sq_sge)); 1614 + cplen = min_t(int, cplen, 1615 + (sizeof(struct sq_sge) - offt)); 1616 + memcpy(il_dst, il_src, cplen); 1617 + t_cplen += cplen; 1618 + il_src += cplen; 1619 + il_dst += cplen; 1620 + offt += cplen; 1621 + len -= cplen; 1622 + if (t_cplen == sizeof(struct sq_sge)) 1623 + pull_dst = true; 1624 + } 1625 + } 1626 + 1627 + return t_len; 1628 + bad: 1629 + return -ENOMEM; 1630 + } 1631 + 1632 + static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, 1633 + struct bnxt_qplib_sge *ssge, 1634 + u16 nsge, u16 *idx) 1635 + { 1636 + struct sq_sge *dsge; 1637 + int indx, len = 0; 1638 + 1639 + for (indx = 0; indx < nsge; indx++, (*idx)++) { 1640 + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); 1641 + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); 1642 + dsge->l_key = cpu_to_le32(ssge[indx].lkey); 1643 + dsge->size = cpu_to_le32(ssge[indx].size); 1644 + len += ssge[indx].size; 1645 + } 1646 + 1647 + return len; 1648 + } 1649 + 1650 + static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, 1651 + struct bnxt_qplib_swqe *wqe, 1652 + u16 *wqe_sz, u16 *qdf, u8 mode) 1653 + { 1654 + u32 ilsize, bytes; 1655 + u16 nsge; 1656 + u16 slot; 1657 + 1658 + nsge = wqe->num_sge; 1659 + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ 1660 + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); 1661 + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { 1662 + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); 1663 + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); 1664 + bytes += sizeof(struct sq_send_hdr); 1665 + } 1666 + 1667 + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); 1668 + slot = bytes >> 4; 1669 + *wqe_sz = slot; 1670 + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) 1671 + slot = 8; 1672 + return slot; 1673 + } 1674 + 1675 + static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, 1676 + struct bnxt_qplib_swq *swq) 1677 + { 1678 + struct bnxt_qplib_hwq *hwq; 1679 + u32 pg_num, pg_indx; 1680 + void *buff; 1681 + u32 tail; 1682 + 1683 + hwq = &sq->hwq; 1684 + if (!hwq->pad_pg) 1685 + return; 1686 + tail = swq->slot_idx / sq->dbinfo.max_slot; 1687 + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); 1688 + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); 1689 + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); 1690 + swq->psn_ext = buff; 1691 + swq->psn_search = buff; 1574 1692 } 1575 1693 1576 1694 void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) ··· 1696 1594 struct bnxt_qplib_nq_work *nq_work = NULL; 1697 1595 int i, rc = 0, data_len = 0, pkt_num = 0; 1698 1596 struct bnxt_qplib_q *sq = &qp->sq; 1699 - struct sq_send *hw_sq_send_hdr; 1597 + struct bnxt_qplib_hwq *hwq; 1700 1598 struct bnxt_qplib_swq *swq; 1701 1599 bool sch_handler = false; 1702 - struct sq_sge *hw_sge; 1703 - u8 wqe_size16; 1600 + u16 wqe_sz, qdf = 0; 1601 + void *base_hdr; 1602 + void *ext_hdr; 1704 1603 __le32 temp32; 1705 - u32 sw_prod; 1604 + u32 wqe_idx; 1605 + u32 slots; 1606 + u16 idx; 1706 1607 1707 - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { 1708 - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1709 - sch_handler = true; 1710 - dev_dbg(&sq->hwq.pdev->dev, 1711 - "%s Error QP. Scheduling for poll_cq\n", 1712 - __func__); 1713 - goto queue_err; 1714 - } 1608 + hwq = &sq->hwq; 1609 + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && 1610 + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1611 + dev_err(&hwq->pdev->dev, 1612 + "QPLIB: FP: QP (0x%x) is in the 0x%x state", 1613 + qp->id, qp->state); 1614 + rc = -EINVAL; 1615 + goto done; 1715 1616 } 1716 1617 1717 - if (bnxt_qplib_queue_full(sq)) { 1718 - dev_err(&sq->hwq.pdev->dev, 1618 + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); 1619 + if (bnxt_qplib_queue_full(sq, slots + qdf)) { 1620 + dev_err(&hwq->pdev->dev, 1719 1621 "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", 1720 - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, 1721 - sq->q_full_delta); 1622 + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); 1722 1623 rc = -ENOMEM; 1723 1624 goto done; 1724 1625 } 1725 - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); 1726 - swq = &sq->swq[sw_prod]; 1626 + 1627 + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); 1628 + bnxt_qplib_pull_psn_buff(sq, swq); 1629 + 1630 + idx = 0; 1631 + swq->slot_idx = hwq->prod; 1632 + swq->slots = slots; 1727 1633 swq->wr_id = wqe->wr_id; 1728 1634 swq->type = wqe->type; 1729 1635 swq->flags = wqe->flags; 1636 + swq->start_psn = sq->psn & BTH_PSN_MASK; 1730 1637 if (qp->sig_type) 1731 1638 swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; 1732 - swq->start_psn = sq->psn & BTH_PSN_MASK; 1733 1639 1734 - hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); 1735 - memset(hw_sq_send_hdr, 0, sq->wqe_size); 1736 - 1737 - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { 1738 - /* Copy the inline data */ 1739 - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { 1740 - dev_warn(&sq->hwq.pdev->dev, 1741 - "Inline data length > 96 detected\n"); 1742 - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; 1743 - } else { 1744 - data_len = wqe->inline_len; 1745 - } 1746 - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); 1747 - wqe_size16 = (data_len + 15) >> 4; 1748 - } else { 1749 - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; 1750 - i < wqe->num_sge; i++, hw_sge++) { 1751 - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); 1752 - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); 1753 - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); 1754 - data_len += wqe->sg_list[i].size; 1755 - } 1756 - /* Each SGE entry = 1 WQE size16 */ 1757 - wqe_size16 = wqe->num_sge; 1758 - /* HW requires wqe size has room for atleast one SGE even if 1759 - * none was supplied by ULP 1760 - */ 1761 - if (!wqe->num_sge) 1762 - wqe_size16++; 1640 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1641 + sch_handler = true; 1642 + dev_dbg(&hwq->pdev->dev, 1643 + "%s Error QP. Scheduling for poll_cq\n", __func__); 1644 + goto queue_err; 1763 1645 } 1764 1646 1647 + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1648 + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1649 + memset(base_hdr, 0, sizeof(struct sq_sge)); 1650 + memset(ext_hdr, 0, sizeof(struct sq_sge)); 1651 + 1652 + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) 1653 + /* Copy the inline data */ 1654 + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); 1655 + else 1656 + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, 1657 + &idx); 1658 + if (data_len < 0) 1659 + goto queue_err; 1765 1660 /* Specifics */ 1766 1661 switch (wqe->type) { 1767 1662 case BNXT_QPLIB_SWQE_TYPE_SEND: 1768 1663 if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { 1664 + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; 1665 + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; 1769 1666 /* Assemble info for Raw Ethertype QPs */ 1770 - struct sq_send_raweth_qp1 *sqe = 1771 - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; 1772 1667 1773 1668 sqe->wqe_type = wqe->type; 1774 1669 sqe->flags = wqe->flags; 1775 - sqe->wqe_size = wqe_size16 + 1776 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1670 + sqe->wqe_size = wqe_sz; 1777 1671 sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); 1778 1672 sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); 1779 1673 sqe->length = cpu_to_le32(data_len); 1780 - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & 1674 + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & 1781 1675 SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << 1782 1676 SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); 1783 1677 ··· 1783 1685 case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: 1784 1686 case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: 1785 1687 { 1786 - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; 1688 + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; 1689 + struct sq_send_hdr *sqe = base_hdr; 1787 1690 1788 1691 sqe->wqe_type = wqe->type; 1789 1692 sqe->flags = wqe->flags; 1790 - sqe->wqe_size = wqe_size16 + 1791 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1792 - sqe->inv_key_or_imm_data = cpu_to_le32( 1793 - wqe->send.inv_key); 1693 + sqe->wqe_size = wqe_sz; 1694 + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); 1794 1695 if (qp->type == CMDQ_CREATE_QP_TYPE_UD || 1795 1696 qp->type == CMDQ_CREATE_QP_TYPE_GSI) { 1796 1697 sqe->q_key = cpu_to_le32(wqe->send.q_key); 1797 - sqe->dst_qp = cpu_to_le32( 1798 - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); 1799 1698 sqe->length = cpu_to_le32(data_len); 1800 - sqe->avid = cpu_to_le32(wqe->send.avid & 1801 - SQ_SEND_AVID_MASK); 1802 1699 sq->psn = (sq->psn + 1) & BTH_PSN_MASK; 1700 + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & 1701 + SQ_SEND_DST_QP_MASK); 1702 + ext_sqe->avid = cpu_to_le32(wqe->send.avid & 1703 + SQ_SEND_AVID_MASK); 1803 1704 } else { 1804 1705 sqe->length = cpu_to_le32(data_len); 1805 - sqe->dst_qp = 0; 1806 - sqe->avid = 0; 1807 1706 if (qp->mtu) 1808 1707 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1809 1708 if (!pkt_num) ··· 1813 1718 case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: 1814 1719 case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: 1815 1720 { 1816 - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; 1721 + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; 1722 + struct sq_rdma_hdr *sqe = base_hdr; 1817 1723 1818 1724 sqe->wqe_type = wqe->type; 1819 1725 sqe->flags = wqe->flags; 1820 - sqe->wqe_size = wqe_size16 + 1821 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1726 + sqe->wqe_size = wqe_sz; 1822 1727 sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); 1823 1728 sqe->length = cpu_to_le32((u32)data_len); 1824 - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); 1825 - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); 1729 + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); 1730 + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); 1826 1731 if (qp->mtu) 1827 1732 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1828 1733 if (!pkt_num) ··· 1833 1738 case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: 1834 1739 case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: 1835 1740 { 1836 - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; 1741 + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; 1742 + struct sq_atomic_hdr *sqe = base_hdr; 1837 1743 1838 1744 sqe->wqe_type = wqe->type; 1839 1745 sqe->flags = wqe->flags; 1840 1746 sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); 1841 1747 sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); 1842 - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); 1843 - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); 1748 + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); 1749 + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); 1844 1750 if (qp->mtu) 1845 1751 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1846 1752 if (!pkt_num) ··· 1851 1755 } 1852 1756 case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: 1853 1757 { 1854 - struct sq_localinvalidate *sqe = 1855 - (struct sq_localinvalidate *)hw_sq_send_hdr; 1758 + struct sq_localinvalidate *sqe = base_hdr; 1856 1759 1857 1760 sqe->wqe_type = wqe->type; 1858 1761 sqe->flags = wqe->flags; ··· 1861 1766 } 1862 1767 case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: 1863 1768 { 1864 - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; 1769 + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; 1770 + struct sq_fr_pmr_hdr *sqe = base_hdr; 1865 1771 1866 1772 sqe->wqe_type = wqe->type; 1867 1773 sqe->flags = wqe->flags; ··· 1886 1790 wqe->frmr.pbl_ptr[i] = cpu_to_le64( 1887 1791 wqe->frmr.page_list[i] | 1888 1792 PTU_PTE_VALID); 1889 - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); 1890 - sqe->va = cpu_to_le64(wqe->frmr.va); 1793 + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); 1794 + ext_sqe->va = cpu_to_le64(wqe->frmr.va); 1891 1795 1892 1796 break; 1893 1797 } 1894 1798 case BNXT_QPLIB_SWQE_TYPE_BIND_MW: 1895 1799 { 1896 - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; 1800 + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; 1801 + struct sq_bind_hdr *sqe = base_hdr; 1897 1802 1898 1803 sqe->wqe_type = wqe->type; 1899 1804 sqe->flags = wqe->flags; ··· 1903 1806 (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); 1904 1807 sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); 1905 1808 sqe->l_key = cpu_to_le32(wqe->bind.r_key); 1906 - sqe->va = cpu_to_le64(wqe->bind.va); 1907 - temp32 = cpu_to_le32(wqe->bind.length); 1908 - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); 1809 + ext_sqe->va = cpu_to_le64(wqe->bind.va); 1810 + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); 1909 1811 break; 1910 1812 } 1911 1813 default: ··· 1913 1817 goto done; 1914 1818 } 1915 1819 swq->next_psn = sq->psn & BTH_PSN_MASK; 1916 - if (qp->type == CMDQ_CREATE_QP_TYPE_RC) 1917 - bnxt_qplib_fill_psn_search(qp, wqe, swq); 1820 + bnxt_qplib_fill_psn_search(qp, wqe, swq); 1918 1821 queue_err: 1919 - if (sch_handler) { 1920 - /* Store the ULP info in the software structures */ 1921 - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); 1922 - swq = &sq->swq[sw_prod]; 1923 - swq->wr_id = wqe->wr_id; 1924 - swq->type = wqe->type; 1925 - swq->flags = wqe->flags; 1926 - if (qp->sig_type) 1927 - swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; 1928 - swq->start_psn = sq->psn & BTH_PSN_MASK; 1929 - } 1930 - sq->hwq.prod++; 1822 + bnxt_qplib_swq_mod_start(sq, wqe_idx); 1823 + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); 1931 1824 qp->wqe_cnt++; 1932 - 1933 1825 done: 1934 1826 if (sch_handler) { 1935 1827 nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); ··· 1927 1843 INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); 1928 1844 queue_work(qp->scq->nq->cqn_wq, &nq_work->work); 1929 1845 } else { 1930 - dev_err(&sq->hwq.pdev->dev, 1846 + dev_err(&hwq->pdev->dev, 1931 1847 "FP: Failed to allocate SQ nq_work!\n"); 1932 1848 rc = -ENOMEM; 1933 1849 } ··· 1947 1863 { 1948 1864 struct bnxt_qplib_nq_work *nq_work = NULL; 1949 1865 struct bnxt_qplib_q *rq = &qp->rq; 1866 + struct rq_wqe_hdr *base_hdr; 1867 + struct rq_ext_hdr *ext_hdr; 1868 + struct bnxt_qplib_hwq *hwq; 1869 + struct bnxt_qplib_swq *swq; 1950 1870 bool sch_handler = false; 1951 - struct sq_sge *hw_sge; 1952 - struct rq_wqe *rqe; 1953 - int i, rc = 0; 1954 - u32 sw_prod; 1871 + u16 wqe_sz, idx; 1872 + u32 wqe_idx; 1873 + int rc = 0; 1955 1874 1956 - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1957 - sch_handler = true; 1958 - dev_dbg(&rq->hwq.pdev->dev, 1959 - "%s: Error QP. Scheduling for poll_cq\n", __func__); 1960 - goto queue_err; 1875 + hwq = &rq->hwq; 1876 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { 1877 + dev_err(&hwq->pdev->dev, 1878 + "QPLIB: FP: QP (0x%x) is in the 0x%x state", 1879 + qp->id, qp->state); 1880 + rc = -EINVAL; 1881 + goto done; 1961 1882 } 1962 - if (bnxt_qplib_queue_full(rq)) { 1963 - dev_err(&rq->hwq.pdev->dev, 1883 + 1884 + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { 1885 + dev_err(&hwq->pdev->dev, 1964 1886 "FP: QP (0x%x) RQ is full!\n", qp->id); 1965 1887 rc = -EINVAL; 1966 1888 goto done; 1967 1889 } 1968 - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); 1969 - rq->swq[sw_prod].wr_id = wqe->wr_id; 1970 1890 1971 - rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); 1972 - memset(rqe, 0, rq->wqe_size); 1891 + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); 1892 + swq->wr_id = wqe->wr_id; 1893 + swq->slots = rq->dbinfo.max_slot; 1973 1894 1974 - /* Calculate wqe_size16 and data_len */ 1975 - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; 1976 - i < wqe->num_sge; i++, hw_sge++) { 1977 - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); 1978 - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); 1979 - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); 1895 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1896 + sch_handler = true; 1897 + dev_dbg(&hwq->pdev->dev, 1898 + "%s: Error QP. Scheduling for poll_cq\n", __func__); 1899 + goto queue_err; 1980 1900 } 1981 - rqe->wqe_type = wqe->type; 1982 - rqe->flags = wqe->flags; 1983 - rqe->wqe_size = wqe->num_sge + 1984 - ((offsetof(typeof(*rqe), data) + 15) >> 4); 1985 - /* HW requires wqe size has room for atleast one SGE even if none 1986 - * was supplied by ULP 1987 - */ 1988 - if (!wqe->num_sge) 1989 - rqe->wqe_size++; 1990 1901 1991 - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ 1992 - rqe->wr_id[0] = cpu_to_le32(sw_prod); 1902 + idx = 0; 1903 + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1904 + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1905 + memset(base_hdr, 0, sizeof(struct sq_sge)); 1906 + memset(ext_hdr, 0, sizeof(struct sq_sge)); 1907 + wqe_sz = (sizeof(struct rq_wqe_hdr) + 1908 + wqe->num_sge * sizeof(struct sq_sge)) >> 4; 1909 + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); 1910 + if (!wqe->num_sge) { 1911 + struct sq_sge *sge; 1993 1912 1913 + sge = bnxt_qplib_get_prod_qe(hwq, idx++); 1914 + sge->size = 0; 1915 + wqe_sz++; 1916 + } 1917 + base_hdr->wqe_type = wqe->type; 1918 + base_hdr->flags = wqe->flags; 1919 + base_hdr->wqe_size = wqe_sz; 1920 + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); 1994 1921 queue_err: 1995 - if (sch_handler) { 1996 - /* Store the ULP info in the software structures */ 1997 - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); 1998 - rq->swq[sw_prod].wr_id = wqe->wr_id; 1999 - } 2000 - 2001 - rq->hwq.prod++; 1922 + bnxt_qplib_swq_mod_start(rq, wqe_idx); 1923 + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); 1924 + done: 2002 1925 if (sch_handler) { 2003 1926 nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); 2004 1927 if (nq_work) { ··· 2014 1923 INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); 2015 1924 queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); 2016 1925 } else { 2017 - dev_err(&rq->hwq.pdev->dev, 1926 + dev_err(&hwq->pdev->dev, 2018 1927 "FP: Failed to allocate RQ nq_work!\n"); 2019 1928 rc = -ENOMEM; 2020 1929 } 2021 1930 } 2022 - done: 1931 + 2023 1932 return rc; 2024 1933 } 2025 1934 ··· 2117 2026 static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, 2118 2027 struct bnxt_qplib_cqe **pcqe, int *budget) 2119 2028 { 2120 - u32 sw_prod, sw_cons; 2121 2029 struct bnxt_qplib_cqe *cqe; 2030 + u32 start, last; 2122 2031 int rc = 0; 2123 2032 2124 2033 /* Now complete all outstanding SQEs with FLUSHED_ERR */ 2125 - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); 2034 + start = sq->swq_start; 2126 2035 cqe = *pcqe; 2127 2036 while (*budget) { 2128 - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); 2129 - if (sw_cons == sw_prod) { 2037 + last = sq->swq_last; 2038 + if (start == last) 2130 2039 break; 2131 - } 2132 2040 /* Skip the FENCE WQE completions */ 2133 - if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { 2041 + if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) { 2134 2042 bnxt_qplib_cancel_phantom_processing(qp); 2135 2043 goto skip_compl; 2136 2044 } ··· 2137 2047 cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; 2138 2048 cqe->opcode = CQ_BASE_CQE_TYPE_REQ; 2139 2049 cqe->qp_handle = (u64)(unsigned long)qp; 2140 - cqe->wr_id = sq->swq[sw_cons].wr_id; 2050 + cqe->wr_id = sq->swq[last].wr_id; 2141 2051 cqe->src_qp = qp->id; 2142 - cqe->type = sq->swq[sw_cons].type; 2052 + cqe->type = sq->swq[last].type; 2143 2053 cqe++; 2144 2054 (*budget)--; 2145 2055 skip_compl: 2146 - sq->hwq.cons++; 2056 + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); 2057 + sq->swq_last = sq->swq[last].next_idx; 2147 2058 } 2148 2059 *pcqe = cqe; 2149 - if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod) 2060 + if (!(*budget) && sq->swq_last != start) 2150 2061 /* Out of budget */ 2151 2062 rc = -EAGAIN; 2152 2063 ··· 2158 2067 struct bnxt_qplib_cqe **pcqe, int *budget) 2159 2068 { 2160 2069 struct bnxt_qplib_cqe *cqe; 2161 - u32 sw_prod, sw_cons; 2162 - int rc = 0; 2070 + u32 start, last; 2163 2071 int opcode = 0; 2072 + int rc = 0; 2164 2073 2165 2074 switch (qp->type) { 2166 2075 case CMDQ_CREATE_QP1_TYPE_GSI: ··· 2176 2085 } 2177 2086 2178 2087 /* Flush the rest of the RQ */ 2179 - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); 2088 + start = rq->swq_start; 2180 2089 cqe = *pcqe; 2181 2090 while (*budget) { 2182 - sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq); 2183 - if (sw_cons == sw_prod) 2091 + last = rq->swq_last; 2092 + if (last == start) 2184 2093 break; 2185 2094 memset(cqe, 0, sizeof(*cqe)); 2186 2095 cqe->status = 2187 2096 CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR; 2188 2097 cqe->opcode = opcode; 2189 2098 cqe->qp_handle = (unsigned long)qp; 2190 - cqe->wr_id = rq->swq[sw_cons].wr_id; 2099 + cqe->wr_id = rq->swq[last].wr_id; 2191 2100 cqe++; 2192 2101 (*budget)--; 2193 - rq->hwq.cons++; 2102 + bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); 2103 + rq->swq_last = rq->swq[last].next_idx; 2194 2104 } 2195 2105 *pcqe = cqe; 2196 - if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod) 2106 + if (!*budget && rq->swq_last != start) 2197 2107 /* Out of budget */ 2198 2108 rc = -EAGAIN; 2199 2109 ··· 2217 2125 * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 2218 2126 */ 2219 2127 static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, 2220 - u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) 2128 + u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) 2221 2129 { 2222 2130 u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; 2223 2131 struct bnxt_qplib_q *sq = &qp->sq; ··· 2230 2138 2231 2139 /* Normal mode */ 2232 2140 /* Check for the psn_search marking before completing */ 2233 - swq = &sq->swq[sw_sq_cons]; 2141 + swq = &sq->swq[swq_last]; 2234 2142 if (swq->psn_search && 2235 2143 le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) { 2236 2144 /* Unmark */ ··· 2239 2147 & ~0x80000000); 2240 2148 dev_dbg(&cq->hwq.pdev->dev, 2241 2149 "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n", 2242 - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); 2150 + cq_cons, qp->id, swq_last, cqe_sq_cons); 2243 2151 sq->condition = true; 2244 2152 sq->send_phantom = true; 2245 2153 ··· 2276 2184 le64_to_cpu 2277 2185 (peek_req_hwcqe->qp_handle)); 2278 2186 peek_sq = &peek_qp->sq; 2279 - peek_sq_cons_idx = HWQ_CMP(le16_to_cpu( 2280 - peek_req_hwcqe->sq_cons_idx) - 1 2281 - , &sq->hwq); 2187 + peek_sq_cons_idx = 2188 + ((le16_to_cpu( 2189 + peek_req_hwcqe->sq_cons_idx) 2190 + - 1) % sq->max_wqe); 2282 2191 /* If the hwcqe's sq's wr_id matches */ 2283 2192 if (peek_sq == sq && 2284 2193 sq->swq[peek_sq_cons_idx].wr_id == ··· 2307 2214 } 2308 2215 dev_err(&cq->hwq.pdev->dev, 2309 2216 "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", 2310 - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); 2217 + cq_cons, qp->id, swq_last, cqe_sq_cons); 2311 2218 rc = -EINVAL; 2312 2219 } 2313 2220 out: ··· 2319 2226 struct bnxt_qplib_cqe **pcqe, int *budget, 2320 2227 u32 cq_cons, struct bnxt_qplib_qp **lib_qp) 2321 2228 { 2322 - u32 sw_sq_cons, cqe_sq_cons; 2323 2229 struct bnxt_qplib_swq *swq; 2324 2230 struct bnxt_qplib_cqe *cqe; 2325 2231 struct bnxt_qplib_qp *qp; 2326 2232 struct bnxt_qplib_q *sq; 2233 + u32 cqe_sq_cons; 2327 2234 int rc = 0; 2328 2235 2329 2236 qp = (struct bnxt_qplib_qp *)((unsigned long) ··· 2335 2242 } 2336 2243 sq = &qp->sq; 2337 2244 2338 - cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); 2339 - if (cqe_sq_cons > sq->hwq.max_elements) { 2340 - dev_err(&cq->hwq.pdev->dev, 2341 - "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", 2342 - cqe_sq_cons, sq->hwq.max_elements); 2343 - return -EINVAL; 2344 - } 2345 - 2245 + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe; 2346 2246 if (qp->sq.flushed) { 2347 2247 dev_dbg(&cq->hwq.pdev->dev, 2348 2248 "%s: QP in Flush QP = %p\n", __func__, qp); ··· 2347 2261 */ 2348 2262 cqe = *pcqe; 2349 2263 while (*budget) { 2350 - sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); 2351 - if (sw_sq_cons == cqe_sq_cons) 2264 + if (sq->swq_last == cqe_sq_cons) 2352 2265 /* Done */ 2353 2266 break; 2354 2267 2355 - swq = &sq->swq[sw_sq_cons]; 2268 + swq = &sq->swq[sq->swq_last]; 2356 2269 memset(cqe, 0, sizeof(*cqe)); 2357 2270 cqe->opcode = CQ_BASE_CQE_TYPE_REQ; 2358 2271 cqe->qp_handle = (u64)(unsigned long)qp; ··· 2365 2280 * of the request being signaled or not, it must complete with 2366 2281 * the hwcqe error status 2367 2282 */ 2368 - if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons && 2283 + if (swq->next_idx == cqe_sq_cons && 2369 2284 hwcqe->status != CQ_REQ_STATUS_OK) { 2370 2285 cqe->status = hwcqe->status; 2371 2286 dev_err(&cq->hwq.pdev->dev, 2372 2287 "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", 2373 - sw_sq_cons, cqe->wr_id, cqe->status); 2288 + sq->swq_last, cqe->wr_id, cqe->status); 2374 2289 cqe++; 2375 2290 (*budget)--; 2376 2291 bnxt_qplib_mark_qp_error(qp); ··· 2378 2293 bnxt_qplib_add_flush_qp(qp); 2379 2294 } else { 2380 2295 /* Before we complete, do WA 9060 */ 2381 - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, 2296 + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, 2382 2297 cqe_sq_cons)) { 2383 2298 *lib_qp = qp; 2384 2299 goto out; ··· 2390 2305 } 2391 2306 } 2392 2307 skip: 2393 - sq->hwq.cons++; 2308 + bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); 2309 + sq->swq_last = swq->next_idx; 2394 2310 if (sq->single) 2395 2311 break; 2396 2312 } 2397 2313 out: 2398 2314 *pcqe = cqe; 2399 - if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) { 2315 + if (sq->swq_last != cqe_sq_cons) { 2400 2316 /* Out of budget */ 2401 2317 rc = -EAGAIN; 2402 2318 goto done; ··· 2472 2386 (*budget)--; 2473 2387 *pcqe = cqe; 2474 2388 } else { 2389 + struct bnxt_qplib_swq *swq; 2390 + 2475 2391 rq = &qp->rq; 2476 - if (wr_id_idx >= rq->hwq.max_elements) { 2392 + if (wr_id_idx > (rq->max_wqe - 1)) { 2477 2393 dev_err(&cq->hwq.pdev->dev, 2478 2394 "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", 2479 - wr_id_idx, rq->hwq.max_elements); 2395 + wr_id_idx, rq->max_wqe); 2480 2396 return -EINVAL; 2481 2397 } 2482 - cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2398 + if (wr_id_idx != rq->swq_last) 2399 + return -EINVAL; 2400 + swq = &rq->swq[rq->swq_last]; 2401 + cqe->wr_id = swq->wr_id; 2483 2402 cqe++; 2484 2403 (*budget)--; 2485 - rq->hwq.cons++; 2404 + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); 2405 + rq->swq_last = swq->next_idx; 2486 2406 *pcqe = cqe; 2487 2407 2488 2408 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { ··· 2559 2467 (*budget)--; 2560 2468 *pcqe = cqe; 2561 2469 } else { 2470 + struct bnxt_qplib_swq *swq; 2471 + 2562 2472 rq = &qp->rq; 2563 - if (wr_id_idx >= rq->hwq.max_elements) { 2473 + if (wr_id_idx > (rq->max_wqe - 1)) { 2564 2474 dev_err(&cq->hwq.pdev->dev, 2565 2475 "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", 2566 - wr_id_idx, rq->hwq.max_elements); 2476 + wr_id_idx, rq->max_wqe); 2567 2477 return -EINVAL; 2568 2478 } 2569 2479 2570 - cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2480 + if (rq->swq_last != wr_id_idx) 2481 + return -EINVAL; 2482 + swq = &rq->swq[rq->swq_last]; 2483 + cqe->wr_id = swq->wr_id; 2571 2484 cqe++; 2572 2485 (*budget)--; 2573 - rq->hwq.cons++; 2486 + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); 2487 + rq->swq_last = swq->next_idx; 2574 2488 *pcqe = cqe; 2575 2489 2576 2490 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { ··· 2667 2569 (*budget)--; 2668 2570 *pcqe = cqe; 2669 2571 } else { 2572 + struct bnxt_qplib_swq *swq; 2573 + 2670 2574 rq = &qp->rq; 2671 - if (wr_id_idx >= rq->hwq.max_elements) { 2575 + if (wr_id_idx > (rq->max_wqe - 1)) { 2672 2576 dev_err(&cq->hwq.pdev->dev, 2673 2577 "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", 2674 - wr_id_idx, rq->hwq.max_elements); 2578 + wr_id_idx, rq->max_wqe); 2675 2579 return -EINVAL; 2676 2580 } 2677 - cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2581 + if (rq->swq_last != wr_id_idx) 2582 + return -EINVAL; 2583 + swq = &rq->swq[rq->swq_last]; 2584 + cqe->wr_id = swq->wr_id; 2678 2585 cqe++; 2679 2586 (*budget)--; 2680 - rq->hwq.cons++; 2587 + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); 2588 + rq->swq_last = swq->next_idx; 2681 2589 *pcqe = cqe; 2682 2590 2683 2591 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { ··· 2705 2601 struct bnxt_qplib_qp *qp; 2706 2602 struct bnxt_qplib_q *sq, *rq; 2707 2603 struct bnxt_qplib_cqe *cqe; 2708 - u32 sw_cons = 0, cqe_cons; 2604 + u32 swq_last = 0, cqe_cons; 2709 2605 int rc = 0; 2710 2606 2711 2607 /* Check the Status */ ··· 2731 2627 cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); 2732 2628 if (cqe_cons == 0xFFFF) 2733 2629 goto do_rq; 2734 - 2735 - if (cqe_cons > sq->hwq.max_elements) { 2736 - dev_err(&cq->hwq.pdev->dev, 2737 - "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", 2738 - cqe_cons, sq->hwq.max_elements); 2739 - goto do_rq; 2740 - } 2630 + cqe_cons %= sq->max_wqe; 2741 2631 2742 2632 if (qp->sq.flushed) { 2743 2633 dev_dbg(&cq->hwq.pdev->dev, ··· 2745 2647 */ 2746 2648 cqe = *pcqe; 2747 2649 while (*budget) { 2748 - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); 2749 - if (sw_cons == cqe_cons) 2650 + swq_last = sq->swq_last; 2651 + if (swq_last == cqe_cons) 2750 2652 break; 2751 - if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { 2653 + if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { 2752 2654 memset(cqe, 0, sizeof(*cqe)); 2753 2655 cqe->status = CQ_REQ_STATUS_OK; 2754 2656 cqe->opcode = CQ_BASE_CQE_TYPE_REQ; 2755 2657 cqe->qp_handle = (u64)(unsigned long)qp; 2756 2658 cqe->src_qp = qp->id; 2757 - cqe->wr_id = sq->swq[sw_cons].wr_id; 2758 - cqe->type = sq->swq[sw_cons].type; 2659 + cqe->wr_id = sq->swq[swq_last].wr_id; 2660 + cqe->type = sq->swq[swq_last].type; 2759 2661 cqe++; 2760 2662 (*budget)--; 2761 2663 } 2762 - sq->hwq.cons++; 2664 + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); 2665 + sq->swq_last = sq->swq[swq_last].next_idx; 2763 2666 } 2764 2667 *pcqe = cqe; 2765 - if (!(*budget) && sw_cons != cqe_cons) { 2668 + if (!(*budget) && swq_last != cqe_cons) { 2766 2669 /* Out of budget */ 2767 2670 rc = -EAGAIN; 2768 2671 goto sq_done; ··· 2775 2676 cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx); 2776 2677 if (cqe_cons == 0xFFFF) { 2777 2678 goto done; 2778 - } else if (cqe_cons > rq->hwq.max_elements) { 2679 + } else if (cqe_cons > rq->max_wqe - 1) { 2779 2680 dev_err(&cq->hwq.pdev->dev, 2780 2681 "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", 2781 - cqe_cons, rq->hwq.max_elements); 2682 + cqe_cons, rq->max_wqe); 2782 2683 goto done; 2783 2684 } 2784 2685
+122 -5
drivers/infiniband/hw/bnxt_re/qplib_fp.h
··· 39 39 #ifndef __BNXT_QPLIB_FP_H__ 40 40 #define __BNXT_QPLIB_FP_H__ 41 41 42 + /* Few helper structures temporarily defined here 43 + * should get rid of these when roce_hsi.h is updated 44 + * in original code base 45 + */ 46 + struct sq_ud_ext_hdr { 47 + __le32 dst_qp; 48 + __le32 avid; 49 + __le64 rsvd; 50 + }; 51 + 52 + struct sq_raw_ext_hdr { 53 + __le32 cfa_meta; 54 + __le32 rsvd0; 55 + __le64 rsvd1; 56 + }; 57 + 58 + struct sq_rdma_ext_hdr { 59 + __le64 remote_va; 60 + __le32 remote_key; 61 + __le32 rsvd; 62 + }; 63 + 64 + struct sq_atomic_ext_hdr { 65 + __le64 swap_data; 66 + __le64 cmp_data; 67 + }; 68 + 69 + struct sq_fr_pmr_ext_hdr { 70 + __le64 pblptr; 71 + __le64 va; 72 + }; 73 + 74 + struct sq_bind_ext_hdr { 75 + __le64 va; 76 + __le32 length_lo; 77 + __le32 length_hi; 78 + }; 79 + 80 + struct rq_ext_hdr { 81 + __le64 rsvd1; 82 + __le64 rsvd2; 83 + }; 84 + 85 + /* Helper structures end */ 86 + 42 87 struct bnxt_qplib_srq { 43 88 struct bnxt_qplib_pd *pd; 44 89 struct bnxt_qplib_dpi *dpi; ··· 119 74 u8 flags; 120 75 u32 start_psn; 121 76 u32 next_psn; 77 + u32 slot_idx; 78 + u8 slots; 122 79 struct sq_psn_search *psn_search; 123 80 struct sq_psn_search_ext *psn_ext; 124 81 }; ··· 260 213 u32 phantom_cqe_cnt; 261 214 u32 next_cq_cons; 262 215 bool flushed; 216 + u32 swq_start; 217 + u32 swq_last; 263 218 }; 264 219 265 220 struct bnxt_qplib_qp { ··· 273 224 u32 id; 274 225 u8 type; 275 226 u8 sig_type; 276 - u32 modify_flags; 227 + u8 wqe_mode; 277 228 u8 state; 278 229 u8 cur_qp_state; 230 + u64 modify_flags; 279 231 u32 max_inline_data; 280 232 u32 mtu; 281 233 u8 path_mtu; ··· 350 300 (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ 351 301 !((raw_cons) & (cp_bit))) 352 302 353 - static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) 303 + static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, 304 + u8 slots) 354 305 { 355 - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), 356 - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, 357 - &qplib_q->hwq); 306 + struct bnxt_qplib_hwq *hwq; 307 + int avail; 308 + 309 + hwq = &que->hwq; 310 + /* False full is possible, retrying post-send makes sense */ 311 + avail = hwq->cons - hwq->prod; 312 + if (hwq->cons <= hwq->prod) 313 + avail += hwq->depth; 314 + return avail <= slots; 358 315 } 359 316 360 317 struct bnxt_qplib_cqe { ··· 546 489 struct bnxt_qplib_cqe *cqe, 547 490 int num_cqes); 548 491 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp); 492 + 493 + static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx) 494 + { 495 + u32 idx; 496 + 497 + idx = que->swq_start; 498 + if (swq_idx) 499 + *swq_idx = idx; 500 + return &que->swq[idx]; 501 + } 502 + 503 + static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) 504 + { 505 + que->swq_start = que->swq[idx].next_idx; 506 + } 507 + 508 + static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) 509 + { 510 + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); 511 + } 512 + 513 + static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) 514 + { 515 + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 516 + que->max_wqe : bnxt_qplib_get_depth(que); 517 + } 518 + 519 + static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) 520 + { 521 + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 522 + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; 523 + } 524 + 525 + static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) 526 + { 527 + return (wqe_size / sizeof(struct sq_sge)); 528 + } 529 + 530 + static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) 531 + { 532 + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 533 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. 534 + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. 535 + * when 8916 is disabled. 536 + */ 537 + return (delta * wqe_bytes) / sizeof(struct sq_sge); 538 + } 539 + 540 + static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) 541 + { 542 + u16 size = 0; 543 + int indx; 544 + 545 + for (indx = 0; indx < wqe->num_sge; indx++) 546 + size += wqe->sg_list[indx].size; 547 + if (size > max) 548 + size = max; 549 + 550 + return size; 551 + } 549 552 #endif /* __BNXT_QPLIB_FP_H__ */
+46 -12
drivers/infiniband/hw/bnxt_re/qplib_res.h
··· 41 41 42 42 extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; 43 43 44 + #define CHIP_NUM_57508 0x1750 45 + #define CHIP_NUM_57504 0x1751 46 + #define CHIP_NUM_57502 0x1752 47 + 48 + enum bnxt_qplib_wqe_mode { 49 + BNXT_QPLIB_WQE_MODE_STATIC = 0x00, 50 + BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01, 51 + BNXT_QPLIB_WQE_MODE_INVALID = 0x02 52 + }; 53 + 54 + struct bnxt_qplib_drv_modes { 55 + u8 wqe_mode; 56 + /* Other modes to follow here */ 57 + }; 58 + 59 + struct bnxt_qplib_chip_ctx { 60 + u16 chip_num; 61 + u8 chip_rev; 62 + u8 chip_metal; 63 + struct bnxt_qplib_drv_modes modes; 64 + }; 65 + 44 66 #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) 45 67 #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) 46 68 #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) ··· 163 141 u32 cons; /* raw */ 164 142 u8 cp_bit; 165 143 u8 is_user; 144 + u64 *pad_pg; 145 + u32 pad_stride; 146 + u32 pad_pgofft; 166 147 }; 167 148 168 149 struct bnxt_qplib_db_info { ··· 173 148 void __iomem *priv_db; 174 149 struct bnxt_qplib_hwq *hwq; 175 150 u32 xid; 151 + u32 max_slot; 176 152 }; 177 153 178 154 /* Tables */ ··· 256 230 u64 hwrm_intf_ver; 257 231 }; 258 232 259 - struct bnxt_qplib_chip_ctx { 260 - u16 chip_num; 261 - u8 chip_rev; 262 - u8 chip_metal; 263 - }; 264 - 265 - #define CHIP_NUM_57508 0x1750 266 - #define CHIP_NUM_57504 0x1751 267 - #define CHIP_NUM_57502 0x1752 268 - 269 233 struct bnxt_qplib_res { 270 234 struct pci_dev *pdev; 271 235 struct bnxt_qplib_chip_ctx *cctx; ··· 333 317 return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); 334 318 } 335 319 320 + static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) 321 + { 322 + idx += hwq->prod; 323 + if (idx >= hwq->depth) 324 + idx -= hwq->depth; 325 + return bnxt_qplib_get_qe(hwq, idx, NULL); 326 + } 327 + 336 328 #define to_bnxt_qplib(ptr, type, member) \ 337 329 container_of(ptr, type, member) 338 330 ··· 375 351 struct bnxt_qplib_ctx *ctx, 376 352 bool virt_fn, bool is_p5); 377 353 354 + static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) 355 + { 356 + hwq->prod = (hwq->prod + cnt) % hwq->depth; 357 + } 358 + 359 + static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, 360 + u32 cnt) 361 + { 362 + hwq->cons = (hwq->cons + cnt) % hwq->depth; 363 + } 364 + 378 365 static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, 379 366 bool arm) 380 367 { ··· 418 383 419 384 key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; 420 385 key <<= 32; 421 - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & 422 - DBC_DBC_INDEX_MASK; 386 + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; 423 387 writeq(key, info->db); 424 388 } 425 389
+1
drivers/infiniband/hw/bnxt_re/roce_hsi.h
··· 1126 1126 #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL 1127 1127 #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL 1128 1128 #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL 1129 + #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL 1129 1130 u8 type; 1130 1131 #define CMDQ_CREATE_QP_TYPE_RC 0x2UL 1131 1132 #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
+5 -4
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 980 980 void c4iw_qp_add_ref(struct ib_qp *qp); 981 981 void c4iw_qp_rem_ref(struct ib_qp *qp); 982 982 struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 983 - u32 max_num_sg, struct ib_udata *udata); 983 + u32 max_num_sg); 984 984 int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 985 985 unsigned int *sg_offset); 986 986 int c4iw_dealloc_mw(struct ib_mw *mw); ··· 1053 1053 const struct ib_recv_wr **bad_wr); 1054 1054 struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); 1055 1055 1056 - typedef int c4iw_restrack_func(struct sk_buff *msg, 1057 - struct rdma_restrack_entry *res); 1058 - extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; 1056 + int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); 1057 + int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); 1058 + int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); 1059 + int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id); 1059 1060 1060 1061 #endif
+1 -2
drivers/infiniband/hw/cxgb4/mem.c
··· 399 399 mmid = stag >> 8; 400 400 mhp->ibmr.rkey = mhp->ibmr.lkey = stag; 401 401 mhp->ibmr.length = mhp->attr.len; 402 - mhp->ibmr.iova = mhp->attr.va_fbo; 403 402 mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); 404 403 pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); 405 404 return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); ··· 690 691 } 691 692 692 693 struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 693 - u32 max_num_sg, struct ib_udata *udata) 694 + u32 max_num_sg) 694 695 { 695 696 struct c4iw_dev *rhp; 696 697 struct c4iw_pd *php;
+3 -19
drivers/infiniband/hw/cxgb4/provider.c
··· 236 236 return 0; 237 237 } 238 238 239 - static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 240 - u16 *pkey) 241 - { 242 - pr_debug("ibdev %p\n", ibdev); 243 - *pkey = 0; 244 - return 0; 245 - } 246 - 247 239 static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, 248 240 union ib_gid *gid) 249 241 { ··· 309 317 IB_PORT_DEVICE_MGMT_SUP | 310 318 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; 311 319 props->gid_tbl_len = 1; 312 - props->pkey_tbl_len = 1; 313 320 props->max_msg_sz = -1; 314 321 315 322 return ret; ··· 430 439 if (err) 431 440 return err; 432 441 433 - immutable->pkey_tbl_len = attr.pkey_tbl_len; 434 442 immutable->gid_tbl_len = attr.gid_tbl_len; 435 443 436 444 return 0; ··· 446 456 FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), 447 457 FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), 448 458 FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); 449 - } 450 - 451 - static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) 452 - { 453 - return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && 454 - c4iw_restrack_funcs[res->type]) ? 455 - c4iw_restrack_funcs[res->type](msg, res) : 0; 456 459 } 457 460 458 461 static const struct ib_device_ops c4iw_dev_ops = { ··· 468 485 .destroy_cq = c4iw_destroy_cq, 469 486 .destroy_qp = c4iw_destroy_qp, 470 487 .destroy_srq = c4iw_destroy_srq, 471 - .fill_res_entry = fill_res_entry, 488 + .fill_res_cq_entry = c4iw_fill_res_cq_entry, 489 + .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry, 490 + .fill_res_mr_entry = c4iw_fill_res_mr_entry, 472 491 .get_dev_fw_str = get_dev_fw_str, 473 492 .get_dma_mr = c4iw_get_dma_mr, 474 493 .get_hw_stats = c4iw_get_mib, ··· 493 508 .post_srq_recv = c4iw_post_srq_recv, 494 509 .query_device = c4iw_query_device, 495 510 .query_gid = c4iw_query_gid, 496 - .query_pkey = c4iw_query_pkey, 497 511 .query_port = c4iw_query_port, 498 512 .query_qp = c4iw_ib_query_qp, 499 513 .reg_user_mr = c4iw_reg_user_mr,
+5 -19
drivers/infiniband/hw/cxgb4/restrack.c
··· 134 134 return -EMSGSIZE; 135 135 } 136 136 137 - static int fill_res_qp_entry(struct sk_buff *msg, 138 - struct rdma_restrack_entry *res) 137 + int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) 139 138 { 140 - struct ib_qp *ibqp = container_of(res, struct ib_qp, res); 141 139 struct t4_swsqe *fsp = NULL, *lsp = NULL; 142 140 struct c4iw_qp *qhp = to_c4iw_qp(ibqp); 143 141 u16 first_sq_idx = 0, last_sq_idx = 0; ··· 193 195 struct c4iw_ep ep; 194 196 }; 195 197 196 - static int fill_res_ep_entry(struct sk_buff *msg, 197 - struct rdma_restrack_entry *res) 198 + int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, 199 + struct rdma_cm_id *cm_id) 198 200 { 199 - struct rdma_cm_id *cm_id = rdma_res_to_id(res); 200 201 struct nlattr *table_attr; 201 202 struct c4iw_ep_common *epcp; 202 203 struct c4iw_listen_ep *listen_ep = NULL; ··· 369 372 return -EMSGSIZE; 370 373 } 371 374 372 - static int fill_res_cq_entry(struct sk_buff *msg, 373 - struct rdma_restrack_entry *res) 375 + int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq) 374 376 { 375 - struct ib_cq *ibcq = container_of(res, struct ib_cq, res); 376 377 struct c4iw_cq *chp = to_c4iw_cq(ibcq); 377 378 struct nlattr *table_attr; 378 379 struct t4_cqe hwcqes[2]; ··· 428 433 return -EMSGSIZE; 429 434 } 430 435 431 - static int fill_res_mr_entry(struct sk_buff *msg, 432 - struct rdma_restrack_entry *res) 436 + int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) 433 437 { 434 - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); 435 438 struct c4iw_mr *mhp = to_c4iw_mr(ibmr); 436 439 struct c4iw_dev *dev = mhp->rhp; 437 440 u32 stag = mhp->attr.stag; ··· 485 492 err: 486 493 return -EMSGSIZE; 487 494 } 488 - 489 - c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { 490 - [RDMA_RESTRACK_QP] = fill_res_qp_entry, 491 - [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, 492 - [RDMA_RESTRACK_CQ] = fill_res_cq_entry, 493 - [RDMA_RESTRACK_MR] = fill_res_mr_entry, 494 - };
+13 -2
drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
··· 606 606 /* Number of sub-CQs to be created for each CQ */ 607 607 u16 sub_cqs_per_cq; 608 608 609 - /* MBZ */ 610 - u16 reserved; 609 + /* Minimum number of WQEs per SQ */ 610 + u16 min_sq_depth; 611 611 612 612 /* Maximum number of SGEs (buffers) allowed for a single send WQE */ 613 613 u16 max_wr_send_sges; ··· 632 632 633 633 /* Maximum number of SGEs for a single RDMA read WQE */ 634 634 u16 max_wr_rdma_sges; 635 + 636 + /* 637 + * Maximum number of bytes that can be written to SQ between two 638 + * consecutive doorbells (in units of 64B). Driver must ensure that only 639 + * complete WQEs are written to queue before issuing a doorbell. 640 + * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can 641 + * be written to SQ between two consecutive doorbells. max_tx_batch=11 642 + * and WQE size = 128B, means up to 5 WQEs can be written to SQ between 643 + * two consecutive doorbells. Zero means unlimited. 644 + */ 645 + u16 max_tx_batch; 635 646 }; 636 647 637 648 struct efa_admin_feature_aenq_desc {
+2
drivers/infiniband/hw/efa/efa_com_cmd.c
··· 480 480 result->max_llq_size = resp.u.queue_attr.max_llq_size; 481 481 result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; 482 482 result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; 483 + result->max_tx_batch = resp.u.queue_attr.max_tx_batch; 484 + result->min_sq_depth = resp.u.queue_attr.min_sq_depth; 483 485 484 486 err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); 485 487 if (err) {
+2
drivers/infiniband/hw/efa/efa_com_cmd.h
··· 127 127 u16 max_sq_sge; 128 128 u16 max_rq_sge; 129 129 u16 max_wr_rdma_sge; 130 + u16 max_tx_batch; 131 + u16 min_sq_depth; 130 132 u8 db_bar; 131 133 }; 132 134
+4 -2
drivers/infiniband/hw/efa/efa_main.c
··· 12 12 13 13 #include "efa.h" 14 14 15 - #define PCI_DEV_ID_EFA_VF 0xefa0 15 + #define PCI_DEV_ID_EFA0_VF 0xefa0 16 + #define PCI_DEV_ID_EFA1_VF 0xefa1 16 17 17 18 static const struct pci_device_id efa_pci_tbl[] = { 18 - { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, 19 + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, 20 + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, 19 21 { } 20 22 }; 21 23
+42
drivers/infiniband/hw/efa/efa_verbs.c
··· 1502 1502 return efa_com_dealloc_uar(&dev->edev, &params); 1503 1503 } 1504 1504 1505 + #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ 1506 + (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ 1507 + NULL : #_attr) 1508 + 1509 + static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, 1510 + const struct efa_ibv_alloc_ucontext_cmd *cmd) 1511 + { 1512 + struct efa_dev *dev = to_edev(ibucontext->device); 1513 + char *attr_str; 1514 + 1515 + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, 1516 + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) 1517 + goto err; 1518 + 1519 + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, 1520 + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, 1521 + attr_str)) 1522 + goto err; 1523 + 1524 + return 0; 1525 + 1526 + err: 1527 + ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", 1528 + attr_str); 1529 + return -EOPNOTSUPP; 1530 + } 1531 + 1505 1532 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1506 1533 { 1507 1534 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1508 1535 struct efa_dev *dev = to_edev(ibucontext->device); 1509 1536 struct efa_ibv_alloc_ucontext_resp resp = {}; 1537 + struct efa_ibv_alloc_ucontext_cmd cmd = {}; 1510 1538 struct efa_com_alloc_uar_result result; 1511 1539 int err; 1512 1540 ··· 1542 1514 * it's fine if the driver does not know all request fields, 1543 1515 * we will ack input fields in our response. 1544 1516 */ 1517 + 1518 + err = ib_copy_from_udata(&cmd, udata, 1519 + min(sizeof(cmd), udata->inlen)); 1520 + if (err) { 1521 + ibdev_dbg(&dev->ibdev, 1522 + "Cannot copy udata for alloc_ucontext\n"); 1523 + goto err_out; 1524 + } 1525 + 1526 + err = efa_user_comp_handshake(ibucontext, &cmd); 1527 + if (err) 1528 + goto err_out; 1545 1529 1546 1530 err = efa_com_alloc_uar(&dev->edev, &result); 1547 1531 if (err) ··· 1566 1526 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1567 1527 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1568 1528 resp.max_llq_size = dev->dev_attr.max_llq_size; 1529 + resp.max_tx_batch = dev->dev_attr.max_tx_batch; 1530 + resp.min_sq_wr = dev->dev_attr.min_sq_depth; 1569 1531 1570 1532 if (udata && udata->outlen) { 1571 1533 err = ib_copy_to_udata(udata, &resp,
+14 -13
drivers/infiniband/hw/hfi1/chip.c
··· 7317 7317 case 1: return OPA_LINK_WIDTH_1X; 7318 7318 case 2: return OPA_LINK_WIDTH_2X; 7319 7319 case 3: return OPA_LINK_WIDTH_3X; 7320 + case 4: return OPA_LINK_WIDTH_4X; 7320 7321 default: 7321 7322 dd_dev_info(dd, "%s: invalid width %d, using 4\n", 7322 7323 __func__, width); 7323 - /* fall through */ 7324 - case 4: return OPA_LINK_WIDTH_4X; 7324 + return OPA_LINK_WIDTH_4X; 7325 7325 } 7326 7326 } 7327 7327 ··· 7376 7376 case 0: 7377 7377 dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G; 7378 7378 break; 7379 + case 1: 7380 + dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; 7381 + break; 7379 7382 default: 7380 7383 dd_dev_err(dd, 7381 7384 "%s: unexpected max rate %d, using 25Gb\n", 7382 7385 __func__, (int)max_rate); 7383 - /* fall through */ 7384 - case 1: 7385 7386 dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; 7386 7387 break; 7387 7388 } ··· 12879 12878 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) 12880 12879 { 12881 12880 switch (chip_lstate) { 12882 - default: 12883 - dd_dev_err(dd, 12884 - "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", 12885 - chip_lstate); 12886 - /* fall through */ 12887 12881 case LSTATE_DOWN: 12888 12882 return IB_PORT_DOWN; 12889 12883 case LSTATE_INIT: ··· 12887 12891 return IB_PORT_ARMED; 12888 12892 case LSTATE_ACTIVE: 12889 12893 return IB_PORT_ACTIVE; 12894 + default: 12895 + dd_dev_err(dd, 12896 + "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", 12897 + chip_lstate); 12898 + return IB_PORT_DOWN; 12890 12899 } 12891 12900 } 12892 12901 ··· 12899 12898 { 12900 12899 /* look at the HFI meta-states only */ 12901 12900 switch (chip_pstate & 0xf0) { 12902 - default: 12903 - dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", 12904 - chip_pstate); 12905 - /* fall through */ 12906 12901 case PLS_DISABLED: 12907 12902 return IB_PORTPHYSSTATE_DISABLED; 12908 12903 case PLS_OFFLINE: ··· 12911 12914 return IB_PORTPHYSSTATE_LINKUP; 12912 12915 case PLS_PHYTEST: 12913 12916 return IB_PORTPHYSSTATE_PHY_TEST; 12917 + default: 12918 + dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", 12919 + chip_pstate); 12920 + return IB_PORTPHYSSTATE_DISABLED; 12914 12921 } 12915 12922 } 12916 12923
-16
drivers/infiniband/hw/hfi1/firmware.c
··· 1868 1868 2; 1869 1869 break; 1870 1870 case PLATFORM_CONFIG_RX_PRESET_TABLE: 1871 - /* fall through */ 1872 1871 case PLATFORM_CONFIG_TX_PRESET_TABLE: 1873 - /* fall through */ 1874 1872 case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: 1875 - /* fall through */ 1876 1873 case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: 1877 1874 pcfgcache->config_tables[table_type].num_table = 1878 1875 table_length_dwords; ··· 1887 1890 /* metadata table */ 1888 1891 switch (table_type) { 1889 1892 case PLATFORM_CONFIG_SYSTEM_TABLE: 1890 - /* fall through */ 1891 1893 case PLATFORM_CONFIG_PORT_TABLE: 1892 - /* fall through */ 1893 1894 case PLATFORM_CONFIG_RX_PRESET_TABLE: 1894 - /* fall through */ 1895 1895 case PLATFORM_CONFIG_TX_PRESET_TABLE: 1896 - /* fall through */ 1897 1896 case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: 1898 - /* fall through */ 1899 1897 case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: 1900 1898 break; 1901 1899 default: ··· 2019 2027 2020 2028 switch (table) { 2021 2029 case PLATFORM_CONFIG_SYSTEM_TABLE: 2022 - /* fall through */ 2023 2030 case PLATFORM_CONFIG_PORT_TABLE: 2024 - /* fall through */ 2025 2031 case PLATFORM_CONFIG_RX_PRESET_TABLE: 2026 - /* fall through */ 2027 2032 case PLATFORM_CONFIG_TX_PRESET_TABLE: 2028 - /* fall through */ 2029 2033 case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: 2030 - /* fall through */ 2031 2034 case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: 2032 2035 if (field && field < platform_config_table_limits[table]) 2033 2036 src_ptr = ··· 2125 2138 pcfgcache->config_tables[table_type].table; 2126 2139 break; 2127 2140 case PLATFORM_CONFIG_RX_PRESET_TABLE: 2128 - /* fall through */ 2129 2141 case PLATFORM_CONFIG_TX_PRESET_TABLE: 2130 - /* fall through */ 2131 2142 case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: 2132 - /* fall through */ 2133 2143 case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: 2134 2144 src_ptr = pcfgcache->config_tables[table_type].table; 2135 2145
+4 -5
drivers/infiniband/hw/hfi1/mad.c
··· 721 721 /* Bad mkey not a violation below level 2 */ 722 722 if (ibp->rvp.mkeyprot < 2) 723 723 break; 724 - /* fall through */ 724 + fallthrough; 725 725 case IB_MGMT_METHOD_SET: 726 726 case IB_MGMT_METHOD_TRAP_REPRESS: 727 727 if (ibp->rvp.mkey_violations != 0xFFFF) ··· 1272 1272 case IB_PORT_NOP: 1273 1273 if (phys_state == IB_PORTPHYSSTATE_NOP) 1274 1274 break; 1275 - /* FALLTHROUGH */ 1275 + fallthrough; 1276 1276 case IB_PORT_DOWN: 1277 1277 if (phys_state == IB_PORTPHYSSTATE_NOP) { 1278 1278 link_state = HLS_DN_DOWNDEF; ··· 2300 2300 * can be changed from the default values 2301 2301 */ 2302 2302 case OPA_VLARB_PREEMPT_ELEMENTS: 2303 - /* FALLTHROUGH */ 2304 2303 case OPA_VLARB_PREEMPT_MATRIX: 2305 2304 smp->status |= IB_SMP_UNSUP_METH_ATTR; 2306 2305 break; ··· 4169 4170 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; 4170 4171 if (ibp->rvp.port_cap_flags & IB_PORT_SM) 4171 4172 return IB_MAD_RESULT_SUCCESS; 4172 - /* FALLTHROUGH */ 4173 + fallthrough; 4173 4174 default: 4174 4175 smp->status |= IB_SMP_UNSUP_METH_ATTR; 4175 4176 ret = reply((struct ib_mad_hdr *)smp); ··· 4239 4240 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; 4240 4241 if (ibp->rvp.port_cap_flags & IB_PORT_SM) 4241 4242 return IB_MAD_RESULT_SUCCESS; 4242 - /* FALLTHROUGH */ 4243 + fallthrough; 4243 4244 default: 4244 4245 smp->status |= IB_SMP_UNSUP_METH_ATTR; 4245 4246 ret = reply((struct ib_mad_hdr *)smp);
+15 -7
drivers/infiniband/hw/hfi1/pcie.c
··· 306 306 ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); 307 307 if (ret) { 308 308 dd_dev_err(dd, "Unable to read from PCI config\n"); 309 - return ret; 309 + return pcibios_err_to_errno(ret); 310 310 } 311 311 312 312 if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) { ··· 334 334 return 0; 335 335 } 336 336 337 - /* restore command and BARs after a reset has wiped them out */ 337 + /** 338 + * Restore command and BARs after a reset has wiped them out 339 + * 340 + * Returns 0 on success, otherwise a negative error value 341 + */ 338 342 int restore_pci_variables(struct hfi1_devdata *dd) 339 343 { 340 - int ret = 0; 344 + int ret; 341 345 342 346 ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); 343 347 if (ret) ··· 390 386 391 387 error: 392 388 dd_dev_err(dd, "Unable to write to PCI config\n"); 393 - return ret; 389 + return pcibios_err_to_errno(ret); 394 390 } 395 391 396 - /* Save BARs and command to rewrite after device reset */ 392 + /** 393 + * Save BARs and command to rewrite after device reset 394 + * 395 + * Returns 0 on success, otherwise a negative error value 396 + */ 397 397 int save_pci_variables(struct hfi1_devdata *dd) 398 398 { 399 - int ret = 0; 399 + int ret; 400 400 401 401 ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, 402 402 &dd->pcibar0); ··· 449 441 450 442 error: 451 443 dd_dev_err(dd, "Unable to read from PCI config\n"); 452 - return ret; 444 + return pcibios_err_to_errno(ret); 453 445 } 454 446 455 447 /*
+1 -1
drivers/infiniband/hw/hfi1/pio.c
··· 86 86 switch (op) { 87 87 case PSC_GLOBAL_ENABLE: 88 88 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 89 - /* Fall through */ 89 + fallthrough; 90 90 case PSC_DATA_VL_ENABLE: 91 91 mask = 0; 92 92 for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
+6 -6
drivers/infiniband/hw/hfi1/pio_copy.c
··· 191 191 switch (n) { 192 192 case 7: 193 193 *dest++ = *src++; 194 - /* fall through */ 194 + fallthrough; 195 195 case 6: 196 196 *dest++ = *src++; 197 - /* fall through */ 197 + fallthrough; 198 198 case 5: 199 199 *dest++ = *src++; 200 - /* fall through */ 200 + fallthrough; 201 201 case 4: 202 202 *dest++ = *src++; 203 - /* fall through */ 203 + fallthrough; 204 204 case 3: 205 205 *dest++ = *src++; 206 - /* fall through */ 206 + fallthrough; 207 207 case 2: 208 208 *dest++ = *src++; 209 - /* fall through */ 209 + fallthrough; 210 210 case 1: 211 211 *dest++ = *src++; 212 212 /* fall through */
+5 -5
drivers/infiniband/hw/hfi1/platform.c
··· 668 668 669 669 /* active optical cables only */ 670 670 switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { 671 - case 0x0 ... 0x9: /* fallthrough */ 672 - case 0xC: /* fallthrough */ 671 + case 0x0 ... 0x9: fallthrough; 672 + case 0xC: fallthrough; 673 673 case 0xE: 674 674 /* active AOC */ 675 675 power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); ··· 899 899 900 900 *ptr_tuning_method = OPA_PASSIVE_TUNING; 901 901 break; 902 - case 0x0 ... 0x9: /* fallthrough */ 903 - case 0xC: /* fallthrough */ 902 + case 0x0 ... 0x9: fallthrough; 903 + case 0xC: fallthrough; 904 904 case 0xE: 905 905 ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset, 906 906 ptr_total_atten); ··· 909 909 910 910 *ptr_tuning_method = OPA_ACTIVE_TUNING; 911 911 break; 912 - case 0xD: /* fallthrough */ 912 + case 0xD: fallthrough; 913 913 case 0xF: 914 914 default: 915 915 dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
+1 -1
drivers/infiniband/hw/hfi1/qp.c
··· 312 312 switch (qp->ibqp.qp_type) { 313 313 case IB_QPT_RC: 314 314 hfi1_setup_tid_rdma_wqe(qp, wqe); 315 - /* fall through */ 315 + fallthrough; 316 316 case IB_QPT_UC: 317 317 if (wqe->length > 0x80000000U) 318 318 return -EINVAL;
-14
drivers/infiniband/hw/hfi1/qp.h
··· 113 113 } 114 114 115 115 /** 116 - * hfi1_create_qp - create a queue pair for a device 117 - * @ibpd: the protection domain who's device we create the queue pair for 118 - * @init_attr: the attributes of the queue pair 119 - * @udata: user data for libibverbs.so 120 - * 121 - * Returns the queue pair on success, otherwise returns an errno. 122 - * 123 - * Called by the ib_create_qp() core verbs function. 124 - */ 125 - struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, 126 - struct ib_qp_init_attr *init_attr, 127 - struct ib_udata *udata); 128 - 129 - /** 130 116 * hfi1_qp_wakeup - wake up on the indicated event 131 117 * @qp: the QP 132 118 * @flag: flag the qp on which the qp is stalled
+2 -2
drivers/infiniband/hw/hfi1/qsfp.c
··· 231 231 break; 232 232 case 2: 233 233 offset_bytes[1] = (offset >> 8) & 0xff; 234 - /* fall through */ 234 + fallthrough; 235 235 case 1: 236 236 num_msgs = 2; 237 237 offset_bytes[0] = offset & 0xff; ··· 279 279 break; 280 280 case 2: 281 281 offset_bytes[1] = (offset >> 8) & 0xff; 282 - /* fall through */ 282 + fallthrough; 283 283 case 1: 284 284 num_msgs = 2; 285 285 offset_bytes[0] = offset & 0xff;
+12 -13
drivers/infiniband/hw/hfi1/rc.c
··· 141 141 case OP(RDMA_READ_RESPONSE_ONLY): 142 142 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 143 143 release_rdma_sge_mr(e); 144 - /* FALLTHROUGH */ 144 + fallthrough; 145 145 case OP(ATOMIC_ACKNOWLEDGE): 146 146 /* 147 147 * We can increment the tail pointer now that the last ··· 160 160 qp->s_acked_ack_queue = next; 161 161 qp->s_tail_ack_queue = next; 162 162 trace_hfi1_rsp_make_rc_ack(qp, e->psn); 163 - /* FALLTHROUGH */ 163 + fallthrough; 164 164 case OP(SEND_ONLY): 165 165 case OP(ACKNOWLEDGE): 166 166 /* Check for no next entry in the queue. */ ··· 267 267 268 268 case OP(RDMA_READ_RESPONSE_FIRST): 269 269 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 270 - /* FALLTHROUGH */ 270 + fallthrough; 271 271 case OP(RDMA_READ_RESPONSE_MIDDLE): 272 272 ps->s_txreq->ss = &qp->s_ack_rdma_sge; 273 273 ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; ··· 881 881 goto bail; 882 882 } 883 883 qp->s_num_rd_atomic++; 884 - 885 - /* FALLTHROUGH */ 884 + fallthrough; 886 885 case IB_WR_OPFN: 887 886 if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) 888 887 qp->s_lsn++; ··· 945 946 * See restart_rc(). 946 947 */ 947 948 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); 948 - /* FALLTHROUGH */ 949 + fallthrough; 949 950 case OP(SEND_FIRST): 950 951 qp->s_state = OP(SEND_MIDDLE); 951 - /* FALLTHROUGH */ 952 + fallthrough; 952 953 case OP(SEND_MIDDLE): 953 954 bth2 = mask_psn(qp->s_psn++); 954 955 ss = &qp->s_sge; ··· 990 991 * See restart_rc(). 991 992 */ 992 993 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); 993 - /* FALLTHROUGH */ 994 + fallthrough; 994 995 case OP(RDMA_WRITE_FIRST): 995 996 qp->s_state = OP(RDMA_WRITE_MIDDLE); 996 - /* FALLTHROUGH */ 997 + fallthrough; 997 998 case OP(RDMA_WRITE_MIDDLE): 998 999 bth2 = mask_psn(qp->s_psn++); 999 1000 ss = &qp->s_sge; ··· 2900 2901 if (!ret) 2901 2902 goto rnr_nak; 2902 2903 qp->r_rcv_len = 0; 2903 - /* FALLTHROUGH */ 2904 + fallthrough; 2904 2905 case OP(SEND_MIDDLE): 2905 2906 case OP(RDMA_WRITE_MIDDLE): 2906 2907 send_middle: ··· 2940 2941 goto no_immediate_data; 2941 2942 if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) 2942 2943 goto send_last_inv; 2943 - /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ 2944 + fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */ 2944 2945 case OP(SEND_LAST_WITH_IMMEDIATE): 2945 2946 send_last_imm: 2946 2947 wc.ex.imm_data = ohdr->u.imm_data; ··· 2956 2957 goto send_last; 2957 2958 case OP(RDMA_WRITE_LAST): 2958 2959 copy_last = rvt_is_user_qp(qp); 2959 - /* fall through */ 2960 + fallthrough; 2960 2961 case OP(SEND_LAST): 2961 2962 no_immediate_data: 2962 2963 wc.wc_flags = 0; ··· 3009 3010 3010 3011 case OP(RDMA_WRITE_ONLY): 3011 3012 copy_last = rvt_is_user_qp(qp); 3012 - /* fall through */ 3013 + fallthrough; 3013 3014 case OP(RDMA_WRITE_FIRST): 3014 3015 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 3015 3016 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+4 -5
drivers/infiniband/hw/hfi1/sdma.c
··· 2584 2584 * 7220, e.g. 2585 2585 */ 2586 2586 ss->go_s99_running = 1; 2587 - /* fall through -- and start dma engine */ 2587 + fallthrough; /* and start dma engine */ 2588 2588 case sdma_event_e10_go_hw_start: 2589 2589 /* This reference means the state machine is started */ 2590 2590 sdma_get(&sde->state); ··· 2726 2726 case sdma_event_e70_go_idle: 2727 2727 break; 2728 2728 case sdma_event_e85_link_down: 2729 - /* fall through */ 2730 2729 case sdma_event_e80_hw_freeze: 2731 2730 sdma_set_state(sde, sdma_state_s80_hw_freeze); 2732 2731 atomic_dec(&sde->dd->sdma_unfreeze_count); ··· 3006 3007 case sdma_event_e60_hw_halted: 3007 3008 need_progress = 1; 3008 3009 sdma_err_progress_check_schedule(sde); 3009 - /* fall through */ 3010 + fallthrough; 3010 3011 case sdma_event_e90_sw_halted: 3011 3012 /* 3012 3013 * SW initiated halt does not perform engines ··· 3020 3021 break; 3021 3022 case sdma_event_e85_link_down: 3022 3023 ss->go_s99_running = 0; 3023 - /* fall through */ 3024 + fallthrough; 3024 3025 case sdma_event_e80_hw_freeze: 3025 3026 sdma_set_state(sde, sdma_state_s80_hw_freeze); 3026 3027 atomic_dec(&sde->dd->sdma_unfreeze_count); ··· 3251 3252 tx->num_desc++; 3252 3253 tx->descs[2].qw[0] = 0; 3253 3254 tx->descs[2].qw[1] = 0; 3254 - /* FALLTHROUGH */ 3255 + fallthrough; 3255 3256 case SDMA_AHG_APPLY_UPDATE2: 3256 3257 tx->num_desc++; 3257 3258 tx->descs[1].qw[0] = 0;
+2 -2
drivers/infiniband/hw/hfi1/tid_rdma.c
··· 3227 3227 case IB_WR_RDMA_READ: 3228 3228 if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE) 3229 3229 break; 3230 - /* fall through */ 3230 + fallthrough; 3231 3231 case IB_WR_TID_RDMA_READ: 3232 3232 switch (prev->wr.opcode) { 3233 3233 case IB_WR_RDMA_READ: ··· 5067 5067 if (priv->s_state == TID_OP(WRITE_REQ)) 5068 5068 hfi1_tid_rdma_restart_req(qp, wqe, &bth2); 5069 5069 priv->s_state = TID_OP(WRITE_DATA); 5070 - /* fall through */ 5070 + fallthrough; 5071 5071 5072 5072 case TID_OP(WRITE_DATA): 5073 5073 /*
+4 -4
drivers/infiniband/hw/hfi1/uc.c
··· 216 216 217 217 case OP(SEND_FIRST): 218 218 qp->s_state = OP(SEND_MIDDLE); 219 - /* FALLTHROUGH */ 219 + fallthrough; 220 220 case OP(SEND_MIDDLE): 221 221 len = qp->s_len; 222 222 if (len > pmtu) { ··· 241 241 242 242 case OP(RDMA_WRITE_FIRST): 243 243 qp->s_state = OP(RDMA_WRITE_MIDDLE); 244 - /* FALLTHROUGH */ 244 + fallthrough; 245 245 case OP(RDMA_WRITE_MIDDLE): 246 246 len = qp->s_len; 247 247 if (len > pmtu) { ··· 414 414 goto no_immediate_data; 415 415 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) 416 416 goto send_last_imm; 417 - /* FALLTHROUGH */ 417 + fallthrough; 418 418 case OP(SEND_MIDDLE): 419 419 /* Check for invalid length PMTU or posted rwqe len. */ 420 420 /* ··· 515 515 wc.ex.imm_data = ohdr->u.rc.imm_data; 516 516 goto rdma_last_imm; 517 517 } 518 - /* FALLTHROUGH */ 518 + fallthrough; 519 519 case OP(RDMA_WRITE_MIDDLE): 520 520 /* Check for invalid length PMTU or posted rwqe len. */ 521 521 if (unlikely(tlen != (hdrsize + pmtu + 4)))
+16 -15
drivers/infiniband/hw/hns/hns_roce_device.h
··· 37 37 38 38 #define DRV_NAME "hns_roce" 39 39 40 - /* hip08 is a pci device, it includes two version according pci version id */ 41 - #define PCI_REVISION_ID_HIP08_A 0x20 42 - #define PCI_REVISION_ID_HIP08_B 0x21 40 + /* hip08 is a pci device */ 41 + #define PCI_REVISION_ID_HIP08 0x21 43 42 44 43 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') 45 44 ··· 347 348 bool mtt_only; /* only alloc buffer-required MTT memory */ 348 349 }; 349 350 351 + struct hns_roce_hem_cfg { 352 + dma_addr_t root_ba; /* root BA table's address */ 353 + bool is_direct; /* addressing without BA table */ 354 + unsigned int ba_pg_shift; /* BA table page shift */ 355 + unsigned int buf_pg_shift; /* buffer page shift */ 356 + unsigned int buf_pg_count; /* buffer page count */ 357 + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; 358 + int region_count; 359 + }; 360 + 350 361 /* memory translate region */ 351 362 struct hns_roce_mtr { 352 363 struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ 353 364 struct ib_umem *umem; /* user space buffer */ 354 365 struct hns_roce_buf *kmem; /* kernel space buffer */ 355 - struct { 356 - dma_addr_t root_ba; /* root BA table's address */ 357 - bool is_direct; /* addressing without BA table */ 358 - unsigned int ba_pg_shift; /* BA table page shift */ 359 - unsigned int buf_pg_shift; /* buffer page shift */ 360 - int buf_pg_count; /* buffer page count */ 361 - struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; 362 - unsigned int region_count; 363 - } hem_cfg; /* config for hardware addressing */ 366 + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ 364 367 }; 365 368 366 369 struct hns_roce_mw { ··· 1193 1192 u64 virt_addr, int mr_access_flags, struct ib_pd *pd, 1194 1193 struct ib_udata *udata); 1195 1194 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1196 - u32 max_num_sg, struct ib_udata *udata); 1195 + u32 max_num_sg); 1197 1196 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1198 1197 unsigned int *sg_offset); 1199 1198 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); ··· 1268 1267 int hns_roce_init(struct hns_roce_dev *hr_dev); 1269 1268 void hns_roce_exit(struct hns_roce_dev *hr_dev); 1270 1269 1271 - int hns_roce_fill_res_entry(struct sk_buff *msg, 1272 - struct rdma_restrack_entry *res); 1270 + int hns_roce_fill_res_cq_entry(struct sk_buff *msg, 1271 + struct ib_cq *ib_cq); 1273 1272 #endif /* _HNS_ROCE_DEVICE_H */
+3 -4
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
··· 2483 2483 u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) 2484 2484 { 2485 2485 struct ib_device *ibdev = &hr_dev->ib_dev; 2486 - int rq_pa_start; 2487 2486 int count; 2488 2487 2489 2488 count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); ··· 2490 2491 ibdev_err(ibdev, "Failed to find SQ ba\n"); 2491 2492 return -ENOBUFS; 2492 2493 } 2493 - rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; 2494 - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, 2495 - NULL); 2494 + 2495 + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, 2496 + 1, NULL); 2496 2497 if (!count) { 2497 2498 ibdev_err(ibdev, "Failed to find RQ ba\n"); 2498 2499 return -ENOBUFS;
+118 -135
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 91 91 } 92 92 93 93 static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, 94 - void *wqe, const struct ib_reg_wr *wr) 94 + const struct ib_reg_wr *wr) 95 95 { 96 + struct hns_roce_wqe_frmr_seg *fseg = 97 + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); 96 98 struct hns_roce_mr *mr = to_hr_mr(wr->mr); 97 - struct hns_roce_wqe_frmr_seg *fseg = wqe; 98 99 u64 pbl_ba; 99 100 100 101 /* use ib_access_flags */ ··· 129 128 V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); 130 129 } 131 130 132 - static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, 131 + static void set_atomic_seg(const struct ib_send_wr *wr, 133 132 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, 134 133 unsigned int valid_num_sge) 135 134 { 136 - struct hns_roce_wqe_atomic_seg *aseg; 135 + struct hns_roce_v2_wqe_data_seg *dseg = 136 + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); 137 + struct hns_roce_wqe_atomic_seg *aseg = 138 + (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg); 137 139 138 - set_data_seg_v2(wqe, wr->sg_list); 139 - aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg); 140 + set_data_seg_v2(dseg, wr->sg_list); 140 141 141 142 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 142 143 aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap); ··· 146 143 } else { 147 144 aseg->fetchadd_swap_data = 148 145 cpu_to_le64(atomic_wr(wr)->compare_add); 149 - aseg->cmp_data = 0; 146 + aseg->cmp_data = 0; 150 147 } 151 148 152 149 roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, ··· 179 176 180 177 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, 181 178 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, 182 - void *wqe, unsigned int *sge_ind, 179 + unsigned int *sge_ind, 183 180 unsigned int valid_num_sge) 184 181 { 185 182 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 186 - struct hns_roce_v2_wqe_data_seg *dseg = wqe; 183 + struct hns_roce_v2_wqe_data_seg *dseg = 184 + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); 187 185 struct ib_device *ibdev = &hr_dev->ib_dev; 188 186 struct hns_roce_qp *qp = to_hr_qp(ibqp); 187 + void *wqe = dseg; 189 188 int j = 0; 190 189 int i; 191 190 ··· 443 438 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, 444 439 owner_bit); 445 440 446 - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); 447 441 switch (wr->opcode) { 448 442 case IB_WR_RDMA_READ: 449 443 case IB_WR_RDMA_WRITE: ··· 455 451 rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); 456 452 break; 457 453 case IB_WR_REG_MR: 458 - set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr)); 454 + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); 459 455 break; 460 456 case IB_WR_ATOMIC_CMP_AND_SWP: 461 457 case IB_WR_ATOMIC_FETCH_AND_ADD: ··· 472 468 473 469 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || 474 470 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 475 - set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge); 471 + set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); 476 472 else if (wr->opcode != IB_WR_REG_MR) 477 473 ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, 478 - wqe, &curr_idx, valid_num_sge); 474 + &curr_idx, valid_num_sge); 479 475 480 476 *sge_idx = curr_idx; 481 477 ··· 1514 1510 1515 1511 req_a = (struct hns_roce_vf_res_a *)desc[0].data; 1516 1512 req_b = (struct hns_roce_vf_res_b *)desc[1].data; 1517 - memset(req_a, 0, sizeof(*req_a)); 1518 - memset(req_b, 0, sizeof(*req_b)); 1519 1513 for (i = 0; i < 2; i++) { 1520 1514 hns_roce_cmq_setup_basic_desc(&desc[i], 1521 1515 HNS_ROCE_OPC_ALLOC_VF_RES, false); ··· 1746 1744 caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; 1747 1745 caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; 1748 1746 1749 - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { 1750 - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | 1751 - HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | 1752 - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; 1747 + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | 1748 + HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | 1749 + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; 1753 1750 1754 - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; 1755 - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; 1756 - caps->qpc_timer_ba_pg_sz = 0; 1757 - caps->qpc_timer_buf_pg_sz = 0; 1758 - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 1759 - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; 1760 - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; 1761 - caps->cqc_timer_ba_pg_sz = 0; 1762 - caps->cqc_timer_buf_pg_sz = 0; 1763 - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 1751 + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; 1752 + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; 1753 + caps->qpc_timer_ba_pg_sz = 0; 1754 + caps->qpc_timer_buf_pg_sz = 0; 1755 + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 1756 + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; 1757 + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; 1758 + caps->cqc_timer_ba_pg_sz = 0; 1759 + caps->cqc_timer_buf_pg_sz = 0; 1760 + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 1764 1761 1765 - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; 1766 - caps->sccc_ba_pg_sz = 0; 1767 - caps->sccc_buf_pg_sz = 0; 1768 - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; 1769 - } 1762 + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; 1763 + caps->sccc_ba_pg_sz = 0; 1764 + caps->sccc_buf_pg_sz = 0; 1765 + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; 1770 1766 } 1771 1767 1772 1768 static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, ··· 1995 1995 caps->srqc_bt_num, &caps->srqc_buf_pg_sz, 1996 1996 &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); 1997 1997 1998 - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { 1999 - caps->sccc_hop_num = ctx_hop_num; 2000 - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 2001 - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 1998 + caps->sccc_hop_num = ctx_hop_num; 1999 + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 2000 + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; 2002 2001 2003 - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, 2004 - caps->sccc_hop_num, caps->sccc_bt_num, 2005 - &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, 2006 - HEM_TYPE_SCCC); 2007 - calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, 2008 - caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, 2009 - &caps->cqc_timer_buf_pg_sz, 2010 - &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); 2011 - } 2002 + calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, 2003 + caps->sccc_hop_num, caps->sccc_bt_num, 2004 + &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, 2005 + HEM_TYPE_SCCC); 2006 + calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, 2007 + caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, 2008 + &caps->cqc_timer_buf_pg_sz, 2009 + &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); 2012 2010 2013 2011 calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num, 2014 2012 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); ··· 2053 2055 return ret; 2054 2056 } 2055 2057 2056 - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { 2057 - ret = hns_roce_query_pf_timer_resource(hr_dev); 2058 - if (ret) { 2059 - dev_err(hr_dev->dev, 2060 - "Query pf timer resource fail, ret = %d.\n", 2061 - ret); 2062 - return ret; 2063 - } 2058 + ret = hns_roce_query_pf_timer_resource(hr_dev); 2059 + if (ret) { 2060 + dev_err(hr_dev->dev, 2061 + "failed to query pf timer resource, ret = %d.\n", ret); 2062 + return ret; 2063 + } 2064 2064 2065 - ret = hns_roce_set_vf_switch_param(hr_dev, 0); 2066 - if (ret) { 2067 - dev_err(hr_dev->dev, 2068 - "Set function switch param fail, ret = %d.\n", 2069 - ret); 2070 - return ret; 2071 - } 2065 + ret = hns_roce_set_vf_switch_param(hr_dev, 0); 2066 + if (ret) { 2067 + dev_err(hr_dev->dev, 2068 + "failed to set function switch param, ret = %d.\n", 2069 + ret); 2070 + return ret; 2072 2071 } 2073 2072 2074 2073 hr_dev->vendor_part_id = hr_dev->pci_dev->device; ··· 2331 2336 { 2332 2337 struct hns_roce_v2_priv *priv = hr_dev->priv; 2333 2338 2334 - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) 2335 - hns_roce_function_clear(hr_dev); 2339 + hns_roce_function_clear(hr_dev); 2336 2340 2337 2341 hns_roce_free_link_table(hr_dev, &priv->tpq); 2338 2342 hns_roce_free_link_table(hr_dev, &priv->tsq); ··· 3047 3053 IB_WC_RETRY_EXC_ERR }, 3048 3054 { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, 3049 3055 { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, 3056 + { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR} 3050 3057 }; 3051 3058 3052 3059 u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, ··· 3068 3073 ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); 3069 3074 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, 3070 3075 sizeof(*cqe), false); 3076 + 3077 + /* 3078 + * For hns ROCEE, GENERAL_ERR is an error type that is not defined in 3079 + * the standard protocol, the driver must ignore it and needn't to set 3080 + * the QP to an error state. 3081 + */ 3082 + if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR) 3083 + return; 3071 3084 3072 3085 /* 3073 3086 * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets ··· 3173 3170 /* SQ corresponding to CQE */ 3174 3171 switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, 3175 3172 V2_CQE_BYTE_4_OPCODE_S) & 0x1f) { 3176 - case HNS_ROCE_SQ_OPCODE_SEND: 3173 + case HNS_ROCE_V2_WQE_OP_SEND: 3177 3174 wc->opcode = IB_WC_SEND; 3178 3175 break; 3179 - case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV: 3176 + case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV: 3180 3177 wc->opcode = IB_WC_SEND; 3181 3178 break; 3182 - case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM: 3179 + case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM: 3183 3180 wc->opcode = IB_WC_SEND; 3184 3181 wc->wc_flags |= IB_WC_WITH_IMM; 3185 3182 break; 3186 - case HNS_ROCE_SQ_OPCODE_RDMA_READ: 3183 + case HNS_ROCE_V2_WQE_OP_RDMA_READ: 3187 3184 wc->opcode = IB_WC_RDMA_READ; 3188 3185 wc->byte_len = le32_to_cpu(cqe->byte_cnt); 3189 3186 break; 3190 - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE: 3187 + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE: 3191 3188 wc->opcode = IB_WC_RDMA_WRITE; 3192 3189 break; 3193 - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM: 3190 + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: 3194 3191 wc->opcode = IB_WC_RDMA_WRITE; 3195 3192 wc->wc_flags |= IB_WC_WITH_IMM; 3196 3193 break; 3197 - case HNS_ROCE_SQ_OPCODE_LOCAL_INV: 3194 + case HNS_ROCE_V2_WQE_OP_LOCAL_INV: 3198 3195 wc->opcode = IB_WC_LOCAL_INV; 3199 3196 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 3200 3197 break; 3201 - case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: 3198 + case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: 3202 3199 wc->opcode = IB_WC_COMP_SWAP; 3203 3200 wc->byte_len = 8; 3204 3201 break; 3205 - case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: 3202 + case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: 3206 3203 wc->opcode = IB_WC_FETCH_ADD; 3207 3204 wc->byte_len = 8; 3208 3205 break; 3209 - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: 3206 + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: 3210 3207 wc->opcode = IB_WC_MASKED_COMP_SWAP; 3211 3208 wc->byte_len = 8; 3212 3209 break; 3213 - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: 3210 + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD: 3214 3211 wc->opcode = IB_WC_MASKED_FETCH_ADD; 3215 3212 wc->byte_len = 8; 3216 3213 break; 3217 - case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: 3214 + case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR: 3218 3215 wc->opcode = IB_WC_REG_MR; 3219 3216 break; 3220 - case HNS_ROCE_SQ_OPCODE_BIND_MW: 3217 + case HNS_ROCE_V2_WQE_OP_BIND_MW: 3221 3218 wc->opcode = IB_WC_REG_MR; 3222 3219 break; 3223 3220 default: ··· 3377 3374 return op + step_idx; 3378 3375 } 3379 3376 3377 + static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, 3378 + u32 hem_type, int step_idx) 3379 + { 3380 + struct hns_roce_cmd_mailbox *mailbox; 3381 + int ret; 3382 + int op; 3383 + 3384 + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); 3385 + if (op < 0) 3386 + return 0; 3387 + 3388 + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 3389 + if (IS_ERR(mailbox)) 3390 + return PTR_ERR(mailbox); 3391 + 3392 + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, 3393 + 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); 3394 + 3395 + hns_roce_free_cmd_mailbox(hr_dev, mailbox); 3396 + 3397 + return ret; 3398 + } 3399 + 3380 3400 static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, 3381 3401 struct hns_roce_hem_table *table, int obj, 3382 3402 int step_idx) 3383 3403 { 3384 - struct hns_roce_cmd_mailbox *mailbox; 3385 3404 struct hns_roce_hem_iter iter; 3386 3405 struct hns_roce_hem_mhop mhop; 3387 3406 struct hns_roce_hem *hem; ··· 3415 3390 u64 bt_ba = 0; 3416 3391 u32 chunk_ba_num; 3417 3392 u32 hop_num; 3418 - int op; 3419 3393 3420 3394 if (!hns_roce_check_whether_mhop(hr_dev, table->type)) 3421 3395 return 0; ··· 3436 3412 hem_idx = i; 3437 3413 } 3438 3414 3439 - op = get_op_for_set_hem(hr_dev, table->type, step_idx); 3440 - if (op == -EINVAL) 3441 - return 0; 3442 - 3443 - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 3444 - if (IS_ERR(mailbox)) 3445 - return PTR_ERR(mailbox); 3446 - 3447 3415 if (table->type == HEM_TYPE_SCCC) 3448 3416 obj = mhop.l0_idx; 3449 3417 ··· 3444 3428 for (hns_roce_hem_first(hem, &iter); 3445 3429 !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { 3446 3430 bt_ba = hns_roce_hem_addr(&iter); 3447 - 3448 - /* configure the ba, tag, and op */ 3449 - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, 3450 - obj, 0, op, 3451 - HNS_ROCE_CMD_TIMEOUT_MSECS); 3431 + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, 3432 + step_idx); 3452 3433 } 3453 3434 } else { 3454 3435 if (step_idx == 0) ··· 3453 3440 else if (step_idx == 1 && hop_num == 2) 3454 3441 bt_ba = table->bt_l1_dma_addr[l1_idx]; 3455 3442 3456 - /* configure the ba, tag, and op */ 3457 - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, 3458 - 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); 3443 + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx); 3459 3444 } 3460 3445 3461 - hns_roce_free_cmd_mailbox(hr_dev, mailbox); 3462 3446 return ret; 3463 3447 } 3464 3448 ··· 3755 3745 } 3756 3746 } 3757 3747 3758 - static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, 3759 - struct hns_roce_qp *hr_qp, int mtt_cnt, 3760 - u32 page_size) 3761 - { 3762 - struct ib_device *ibdev = &hr_dev->ib_dev; 3763 - 3764 - if (hr_qp->rq.wqe_cnt < 1) 3765 - return true; 3766 - 3767 - if (mtt_cnt < 1) { 3768 - ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n", 3769 - hr_qp->qpn); 3770 - return false; 3771 - } 3772 - 3773 - if (mtt_cnt < MTT_MIN_COUNT && 3774 - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { 3775 - ibdev_err(ibdev, 3776 - "failed to find next RQWQE buf ba of QP(0x%lx)\n", 3777 - hr_qp->qpn); 3778 - return false; 3779 - } 3780 - 3781 - return true; 3782 - } 3783 - 3784 3748 static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, 3785 3749 struct hns_roce_qp *hr_qp, 3786 3750 struct hns_roce_v2_qp_context *context, 3787 3751 struct hns_roce_v2_qp_context *qpc_mask) 3788 3752 { 3789 - struct ib_qp *ibqp = &hr_qp->ibqp; 3790 3753 u64 mtts[MTT_MIN_COUNT] = { 0 }; 3791 3754 u64 wqe_sge_ba; 3792 - u32 page_size; 3793 3755 int count; 3794 3756 3795 3757 /* Search qp buf's mtts */ 3796 - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; 3797 - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 3798 - hr_qp->rq.offset / page_size, mtts, 3758 + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, 3799 3759 MTT_MIN_COUNT, &wqe_sge_ba); 3800 - if (!ibqp->srq) 3801 - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) 3802 - return -EINVAL; 3760 + if (hr_qp->rq.wqe_cnt && count < 1) { 3761 + ibdev_err(&hr_dev->ib_dev, 3762 + "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn); 3763 + return -EINVAL; 3764 + } 3803 3765 3804 3766 context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); 3805 3767 qpc_mask->wqe_sge_ba = 0; ··· 3873 3891 struct ib_device *ibdev = &hr_dev->ib_dev; 3874 3892 u64 sge_cur_blk = 0; 3875 3893 u64 sq_cur_blk = 0; 3876 - u32 page_size; 3877 3894 int count; 3878 3895 3879 3896 /* search qp buf's mtts */ ··· 3883 3902 return -EINVAL; 3884 3903 } 3885 3904 if (hr_qp->sge.sge_cnt > 0) { 3886 - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; 3887 3905 count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 3888 - hr_qp->sge.offset / page_size, 3906 + hr_qp->sge.offset, 3889 3907 &sge_cur_blk, 1, NULL); 3890 3908 if (count < 1) { 3891 3909 ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", ··· 4245 4265 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, 4246 4266 V2_QPC_BYTE_24_HOP_LIMIT_S, 0); 4247 4267 4248 - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp) 4268 + if (is_udp) 4249 4269 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4250 4270 V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); 4251 4271 else 4252 4272 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4253 4273 V2_QPC_BYTE_24_TC_S, grh->traffic_class); 4274 + 4254 4275 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4255 4276 V2_QPC_BYTE_24_TC_S, 0); 4256 4277 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, ··· 4282 4301 [IB_QPS_RTR] = { [IB_QPS_RESET] = true, 4283 4302 [IB_QPS_RTS] = true, 4284 4303 [IB_QPS_ERR] = true }, 4285 - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, 4304 + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, 4305 + [IB_QPS_RTS] = true, 4306 + [IB_QPS_ERR] = true }, 4286 4307 [IB_QPS_SQD] = {}, 4287 4308 [IB_QPS_SQE] = {}, 4288 4309 [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
+2 -17
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 179 179 HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, 180 180 HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, 181 181 HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, 182 - HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc, 182 + HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, 183 183 HNS_ROCE_V2_WQE_OP_MASK = 0x1f, 184 - }; 185 - 186 - enum { 187 - HNS_ROCE_SQ_OPCODE_SEND = 0x0, 188 - HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, 189 - HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2, 190 - HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3, 191 - HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4, 192 - HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5, 193 - HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6, 194 - HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7, 195 - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8, 196 - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9, 197 - HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa, 198 - HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb, 199 - HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc, 200 184 }; 201 185 202 186 enum { ··· 214 230 HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, 215 231 HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, 216 232 HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, 233 + HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23, 217 234 218 235 HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, 219 236 };
+1 -1
drivers/infiniband/hw/hns/hns_roce_main.c
··· 428 428 .destroy_ah = hns_roce_destroy_ah, 429 429 .destroy_cq = hns_roce_destroy_cq, 430 430 .disassociate_ucontext = hns_roce_disassociate_ucontext, 431 - .fill_res_entry = hns_roce_fill_res_entry, 431 + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, 432 432 .get_dma_mr = hns_roce_get_dma_mr, 433 433 .get_link_layer = hns_roce_get_link_layer, 434 434 .get_port_immutable = hns_roce_port_immutable,
+125 -81
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 415 415 } 416 416 417 417 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 418 - u32 max_num_sg, struct ib_udata *udata) 418 + u32 max_num_sg) 419 419 { 420 420 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); 421 421 struct device *dev = hr_dev->dev; ··· 871 871 int err; 872 872 int i; 873 873 874 + /* 875 + * Only use the first page address as root ba when hopnum is 0, this 876 + * is because the addresses of all pages are consecutive in this case. 877 + */ 878 + if (mtr->hem_cfg.is_direct) { 879 + mtr->hem_cfg.root_ba = pages[0]; 880 + return 0; 881 + } 882 + 874 883 for (i = 0; i < mtr->hem_cfg.region_count; i++) { 875 884 r = &mtr->hem_cfg.region[i]; 876 885 if (r->offset + r->count > page_cnt) { ··· 905 896 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 906 897 int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) 907 898 { 899 + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; 900 + int start_index; 908 901 int mtt_count; 909 902 int total = 0; 910 903 __le64 *mtts; ··· 918 907 goto done; 919 908 920 909 /* no mtt memory in direct mode, so just return the buffer address */ 921 - if (mtr->hem_cfg.is_direct) { 922 - npage = offset; 923 - for (total = 0; total < mtt_max; total++, npage++) { 924 - addr = mtr->hem_cfg.root_ba + 925 - (npage << mtr->hem_cfg.buf_pg_shift); 910 + if (cfg->is_direct) { 911 + start_index = offset >> HNS_HW_PAGE_SHIFT; 912 + for (mtt_count = 0; mtt_count < cfg->region_count && 913 + total < mtt_max; mtt_count++) { 914 + npage = cfg->region[mtt_count].offset; 915 + if (npage < start_index) 916 + continue; 926 917 918 + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); 927 919 if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) 928 920 mtt_buf[total] = to_hr_hw_page_addr(addr); 929 921 else 930 922 mtt_buf[total] = addr; 923 + 924 + total++; 931 925 } 932 926 933 927 goto done; 934 928 } 935 929 930 + start_index = offset >> cfg->buf_pg_shift; 936 931 left = mtt_max; 937 932 while (left > 0) { 938 933 mtt_count = 0; 939 934 mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, 940 - offset + total, 935 + start_index + total, 941 936 &mtt_count, NULL); 942 937 if (!mtts || !mtt_count) 943 938 goto done; ··· 956 939 957 940 done: 958 941 if (base_addr) 959 - *base_addr = mtr->hem_cfg.root_ba; 942 + *base_addr = cfg->root_ba; 960 943 961 944 return total; 962 945 } 963 946 964 - /* convert buffer size to page index and page count */ 965 - static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, 966 - int page_cnt, 967 - struct hns_roce_buf_region *regions, 968 - int region_cnt, unsigned int page_shift) 947 + static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, 948 + struct hns_roce_buf_attr *attr, 949 + struct hns_roce_hem_cfg *cfg, 950 + unsigned int *buf_page_shift) 969 951 { 970 - unsigned int page_size = 1 << page_shift; 971 - int max_region = attr->region_count; 972 952 struct hns_roce_buf_region *r; 973 - unsigned int i = 0; 974 - int page_idx = 0; 953 + unsigned int page_shift = 0; 954 + int page_cnt = 0; 955 + size_t buf_size; 956 + int region_cnt; 975 957 976 - for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { 977 - r = &regions[i]; 978 - r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? 979 - 0 : attr->region[i].hopnum; 980 - r->offset = page_idx; 981 - r->count = DIV_ROUND_UP(attr->region[i].size, page_size); 982 - page_idx += r->count; 958 + if (cfg->is_direct) { 959 + buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; 960 + page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); 961 + /* 962 + * When HEM buffer use level-0 addressing, the page size equals 963 + * the buffer size, and the the page size = 4K * 2^N. 964 + */ 965 + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); 966 + if (attr->region_count > 1) { 967 + cfg->buf_pg_count = page_cnt; 968 + page_shift = HNS_HW_PAGE_SHIFT; 969 + } else { 970 + cfg->buf_pg_count = 1; 971 + page_shift = cfg->buf_pg_shift; 972 + if (buf_size != 1 << page_shift) { 973 + ibdev_err(&hr_dev->ib_dev, 974 + "failed to check direct size %zu shift %d.\n", 975 + buf_size, page_shift); 976 + return -EINVAL; 977 + } 978 + } 979 + } else { 980 + page_shift = cfg->buf_pg_shift; 983 981 } 984 982 985 - return i; 983 + /* convert buffer size to page index and page count */ 984 + for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && 985 + region_cnt < attr->region_count && 986 + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { 987 + r = &cfg->region[region_cnt]; 988 + r->offset = page_cnt; 989 + buf_size = hr_hw_page_align(attr->region[region_cnt].size); 990 + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); 991 + page_cnt += r->count; 992 + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, 993 + r->count); 994 + } 995 + 996 + if (region_cnt < 1) { 997 + ibdev_err(&hr_dev->ib_dev, 998 + "failed to check mtr region count, pages = %d.\n", 999 + cfg->buf_pg_count); 1000 + return -ENOBUFS; 1001 + } 1002 + 1003 + cfg->region_count = region_cnt; 1004 + *buf_page_shift = page_shift; 1005 + 1006 + return page_cnt; 986 1007 } 987 1008 988 1009 /** 989 1010 * hns_roce_mtr_create - Create hns memory translate region. 990 1011 * 991 1012 * @mtr: memory translate region 992 - * @init_attr: init attribute for creating mtr 993 - * @page_shift: page shift for multi-hop base address table 1013 + * @buf_attr: buffer attribute for creating mtr 1014 + * @ba_page_shift: page shift for multi-hop base address table 994 1015 * @udata: user space context, if it's NULL, means kernel space 995 1016 * @user_addr: userspace virtual address to start at 996 - * @buf_alloced: mtr has private buffer, true means need to alloc 997 1017 */ 998 1018 int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 999 1019 struct hns_roce_buf_attr *buf_attr, 1000 - unsigned int page_shift, struct ib_udata *udata, 1020 + unsigned int ba_page_shift, struct ib_udata *udata, 1001 1021 unsigned long user_addr) 1002 1022 { 1023 + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; 1003 1024 struct ib_device *ibdev = &hr_dev->ib_dev; 1025 + unsigned int buf_page_shift = 0; 1004 1026 dma_addr_t *pages = NULL; 1005 - int region_cnt = 0; 1006 1027 int all_pg_cnt; 1007 1028 int get_pg_cnt; 1008 - bool has_mtt; 1009 - int err = 0; 1029 + int ret = 0; 1010 1030 1011 - has_mtt = mtr_has_mtt(buf_attr); 1031 + /* if disable mtt, all pages must in a continuous address range */ 1032 + cfg->is_direct = !mtr_has_mtt(buf_attr); 1033 + 1012 1034 /* if buffer only need mtt, just init the hem cfg */ 1013 1035 if (buf_attr->mtt_only) { 1014 - mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; 1015 - mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> 1016 - buf_attr->page_shift; 1036 + cfg->buf_pg_shift = buf_attr->page_shift; 1037 + cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> 1038 + buf_attr->page_shift; 1017 1039 mtr->umem = NULL; 1018 1040 mtr->kmem = NULL; 1019 1041 } else { 1020 - err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, 1021 - user_addr); 1022 - if (err) { 1023 - ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", 1024 - err); 1025 - return err; 1042 + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, 1043 + udata, user_addr); 1044 + if (ret) { 1045 + ibdev_err(ibdev, 1046 + "failed to alloc mtr bufs, ret = %d.\n", ret); 1047 + return ret; 1026 1048 } 1027 1049 } 1028 1050 1029 - /* alloc mtt memory */ 1030 - all_pg_cnt = mtr->hem_cfg.buf_pg_count; 1031 - hns_roce_hem_list_init(&mtr->hem_list); 1032 - mtr->hem_cfg.is_direct = !has_mtt; 1033 - mtr->hem_cfg.ba_pg_shift = page_shift; 1034 - mtr->hem_cfg.region_count = 0; 1035 - region_cnt = mtr_init_region(buf_attr, all_pg_cnt, 1036 - mtr->hem_cfg.region, 1037 - ARRAY_SIZE(mtr->hem_cfg.region), 1038 - mtr->hem_cfg.buf_pg_shift); 1039 - if (region_cnt < 1) { 1040 - err = -ENOBUFS; 1041 - ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); 1051 + all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); 1052 + if (all_pg_cnt < 1) { 1053 + ret = -ENOBUFS; 1054 + ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); 1042 1055 goto err_alloc_bufs; 1043 1056 } 1044 1057 1045 - mtr->hem_cfg.region_count = region_cnt; 1046 - 1047 - if (has_mtt) { 1048 - err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, 1049 - mtr->hem_cfg.region, region_cnt, 1050 - page_shift); 1051 - if (err) { 1052 - ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", 1053 - err); 1058 + hns_roce_hem_list_init(&mtr->hem_list); 1059 + if (!cfg->is_direct) { 1060 + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, 1061 + cfg->region, cfg->region_count, 1062 + ba_page_shift); 1063 + if (ret) { 1064 + ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", 1065 + ret); 1054 1066 goto err_alloc_bufs; 1055 1067 } 1056 - mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; 1068 + cfg->root_ba = mtr->hem_list.root_ba; 1069 + cfg->ba_pg_shift = ba_page_shift; 1070 + } else { 1071 + cfg->ba_pg_shift = cfg->buf_pg_shift; 1057 1072 } 1058 1073 1059 1074 /* no buffer to map */ ··· 1095 1046 /* alloc a tmp array to store buffer's dma address */ 1096 1047 pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); 1097 1048 if (!pages) { 1098 - err = -ENOMEM; 1099 - ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", 1049 + ret = -ENOMEM; 1050 + ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", 1100 1051 all_pg_cnt); 1101 1052 goto err_alloc_hem_list; 1102 1053 } 1103 1054 1104 1055 get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, 1105 - mtr->hem_cfg.buf_pg_shift); 1056 + buf_page_shift); 1106 1057 if (get_pg_cnt != all_pg_cnt) { 1107 - ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", 1058 + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", 1108 1059 get_pg_cnt, all_pg_cnt); 1109 - err = -ENOBUFS; 1060 + ret = -ENOBUFS; 1110 1061 goto err_alloc_page_list; 1111 1062 } 1112 1063 1113 - if (!has_mtt) { 1114 - mtr->hem_cfg.root_ba = pages[0]; 1115 - } else { 1116 - /* write buffer's dma address to BA table */ 1117 - err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); 1118 - if (err) { 1119 - ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", 1120 - err); 1121 - goto err_alloc_page_list; 1122 - } 1064 + /* write buffer's dma address to BA table */ 1065 + ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); 1066 + if (ret) { 1067 + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); 1068 + goto err_alloc_page_list; 1123 1069 } 1124 1070 1125 1071 /* drop tmp array */ ··· 1126 1082 hns_roce_hem_list_release(hr_dev, &mtr->hem_list); 1127 1083 err_alloc_bufs: 1128 1084 mtr_free_bufs(hr_dev, mtr); 1129 - return err; 1085 + return ret; 1130 1086 } 1131 1087 1132 1088 void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
-10
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 411 411 struct hns_roce_qp *hr_qp, 412 412 struct ib_qp_cap *cap) 413 413 { 414 - struct ib_device *ibdev = &hr_dev->ib_dev; 415 414 u32 cnt; 416 415 417 416 cnt = max(1U, cap->max_send_sge); ··· 430 431 } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { 431 432 cnt = roundup_pow_of_two(sq_wqe_cnt * 432 433 (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); 433 - 434 - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { 435 - if (cnt > hr_dev->caps.max_extend_sg) { 436 - ibdev_err(ibdev, 437 - "failed to check exSGE num, exSGE num = %d.\n", 438 - cnt); 439 - return -EINVAL; 440 - } 441 - } 442 434 } else { 443 435 cnt = 0; 444 436 }
+2 -12
drivers/infiniband/hw/hns/hns_roce_restrack.c
··· 76 76 return -EMSGSIZE; 77 77 } 78 78 79 - static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, 80 - struct rdma_restrack_entry *res) 79 + int hns_roce_fill_res_cq_entry(struct sk_buff *msg, 80 + struct ib_cq *ib_cq) 81 81 { 82 - struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); 83 82 struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); 84 83 struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); 85 84 struct hns_roce_v2_cq_context *context; ··· 117 118 err: 118 119 kfree(context); 119 120 return ret; 120 - } 121 - 122 - int hns_roce_fill_res_entry(struct sk_buff *msg, 123 - struct rdma_restrack_entry *res) 124 - { 125 - if (res->type == RDMA_RESTRACK_CQ) 126 - return hns_roce_fill_res_cq_entry(msg, res); 127 - 128 - return 0; 129 121 }
+1 -21
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 101 101 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | 102 102 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; 103 103 props->gid_tbl_len = 1; 104 - props->pkey_tbl_len = 1; 105 104 props->active_width = IB_WIDTH_4X; 106 105 props->active_speed = 1; 107 106 props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; ··· 1542 1543 * @pd: ibpd pointer 1543 1544 * @mr_type: memory for stag registrion 1544 1545 * @max_num_sg: man number of pages 1545 - * @udata: user data or NULL for kernel objects 1546 1546 */ 1547 1547 static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1548 - u32 max_num_sg, struct ib_udata *udata) 1548 + u32 max_num_sg) 1549 1549 { 1550 1550 struct i40iw_pd *iwpd = to_iwpd(pd); 1551 1551 struct i40iw_device *iwdev = to_iwdev(pd->device); ··· 2458 2460 if (err) 2459 2461 return err; 2460 2462 2461 - immutable->pkey_tbl_len = attr.pkey_tbl_len; 2462 2463 immutable->gid_tbl_len = attr.gid_tbl_len; 2463 2464 2464 2465 return 0; ··· 2613 2616 return 0; 2614 2617 } 2615 2618 2616 - /** 2617 - * i40iw_query_pkey - Query partition key 2618 - * @ibdev: device pointer from stack 2619 - * @port: port number 2620 - * @index: index of pkey 2621 - * @pkey: pointer to store the pkey 2622 - */ 2623 - static int i40iw_query_pkey(struct ib_device *ibdev, 2624 - u8 port, 2625 - u16 index, 2626 - u16 *pkey) 2627 - { 2628 - *pkey = 0; 2629 - return 0; 2630 - } 2631 - 2632 2619 static const struct ib_device_ops i40iw_dev_ops = { 2633 2620 .owner = THIS_MODULE, 2634 2621 .driver_id = RDMA_DRIVER_I40IW, ··· 2652 2671 .post_send = i40iw_post_send, 2653 2672 .query_device = i40iw_query_device, 2654 2673 .query_gid = i40iw_query_gid, 2655 - .query_pkey = i40iw_query_pkey, 2656 2674 .query_port = i40iw_query_port, 2657 2675 .query_qp = i40iw_query_qp, 2658 2676 .reg_user_mr = i40iw_reg_user_mr,
+15 -22
drivers/infiniband/hw/mlx4/main.c
··· 1219 1219 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); 1220 1220 } 1221 1221 1222 - static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, 1223 - struct ib_udata *udata) 1222 + static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) 1224 1223 { 1225 - struct mlx4_ib_xrcd *xrcd; 1224 + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); 1225 + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); 1226 1226 struct ib_cq_init_attr cq_attr = {}; 1227 1227 int err; 1228 1228 1229 - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) 1230 - return ERR_PTR(-ENOSYS); 1229 + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) 1230 + return -EOPNOTSUPP; 1231 1231 1232 - xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); 1233 - if (!xrcd) 1234 - return ERR_PTR(-ENOMEM); 1235 - 1236 - err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); 1232 + err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn); 1237 1233 if (err) 1238 - goto err1; 1234 + return err; 1239 1235 1240 - xrcd->pd = ib_alloc_pd(ibdev, 0); 1236 + xrcd->pd = ib_alloc_pd(ibxrcd->device, 0); 1241 1237 if (IS_ERR(xrcd->pd)) { 1242 1238 err = PTR_ERR(xrcd->pd); 1243 1239 goto err2; 1244 1240 } 1245 1241 1246 1242 cq_attr.cqe = 1; 1247 - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); 1243 + xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr); 1248 1244 if (IS_ERR(xrcd->cq)) { 1249 1245 err = PTR_ERR(xrcd->cq); 1250 1246 goto err3; 1251 1247 } 1252 1248 1253 - return &xrcd->ibxrcd; 1249 + return 0; 1254 1250 1255 1251 err3: 1256 1252 ib_dealloc_pd(xrcd->pd); 1257 1253 err2: 1258 - mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); 1259 - err1: 1260 - kfree(xrcd); 1261 - return ERR_PTR(err); 1254 + mlx4_xrcd_free(dev->dev, xrcd->xrcdn); 1255 + return err; 1262 1256 } 1263 1257 1264 - static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 1258 + static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 1265 1259 { 1266 1260 ib_destroy_cq(to_mxrcd(xrcd)->cq); 1267 1261 ib_dealloc_pd(to_mxrcd(xrcd)->pd); 1268 1262 mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); 1269 - kfree(xrcd); 1270 - 1271 - return 0; 1272 1263 } 1273 1264 1274 1265 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) ··· 2598 2607 static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { 2599 2608 .alloc_xrcd = mlx4_ib_alloc_xrcd, 2600 2609 .dealloc_xrcd = mlx4_ib_dealloc_xrcd, 2610 + 2611 + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd), 2601 2612 }; 2602 2613 2603 2614 static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
+1 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 729 729 struct ib_udata *udata); 730 730 int mlx4_ib_dealloc_mw(struct ib_mw *mw); 731 731 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 732 - u32 max_num_sg, struct ib_udata *udata); 732 + u32 max_num_sg); 733 733 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 734 734 unsigned int *sg_offset); 735 735 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+1 -2
drivers/infiniband/hw/mlx4/mr.c
··· 439 439 440 440 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 441 441 mr->ibmr.length = length; 442 - mr->ibmr.iova = virt_addr; 443 442 mr->ibmr.page_size = 1U << shift; 444 443 445 444 return &mr->ibmr; ··· 654 655 } 655 656 656 657 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 657 - u32 max_num_sg, struct ib_udata *udata) 658 + u32 max_num_sg) 658 659 { 659 660 struct mlx4_ib_dev *dev = to_mdev(pd->device); 660 661 struct mlx4_ib_mr *mr;
+4 -2
drivers/infiniband/hw/mlx5/Makefile
··· 4 4 mlx5_ib-y := ah.o \ 5 5 cmd.o \ 6 6 cong.o \ 7 + counters.o \ 7 8 cq.o \ 8 9 doorbell.o \ 9 10 gsi.o \ ··· 23 22 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o 24 23 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o 25 24 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ 26 - flow.o \ 27 - qos.o 25 + fs.o \ 26 + qos.o \ 27 + std_types.o
-12
drivers/infiniband/hw/mlx5/cmd.c
··· 148 148 spin_unlock(&dm->lock); 149 149 } 150 150 151 - int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) 152 - { 153 - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; 154 - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 155 - 156 - MLX5_SET(ppcnt_reg, in, local_port, 1); 157 - 158 - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); 159 - return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, 160 - 0, 0); 161 - } 162 - 163 151 void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) 164 152 { 165 153 u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
-1
drivers/infiniband/hw/mlx5/cmd.h
··· 41 41 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); 42 42 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 43 43 void *out); 44 - int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); 45 44 int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, 46 45 u64 length, u32 alignment); 47 46 void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
+709
drivers/infiniband/hw/mlx5/counters.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #include "mlx5_ib.h" 7 + #include <linux/mlx5/eswitch.h> 8 + #include "counters.h" 9 + #include "ib_rep.h" 10 + #include "qp.h" 11 + 12 + struct mlx5_ib_counter { 13 + const char *name; 14 + size_t offset; 15 + }; 16 + 17 + #define INIT_Q_COUNTER(_name) \ 18 + { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} 19 + 20 + static const struct mlx5_ib_counter basic_q_cnts[] = { 21 + INIT_Q_COUNTER(rx_write_requests), 22 + INIT_Q_COUNTER(rx_read_requests), 23 + INIT_Q_COUNTER(rx_atomic_requests), 24 + INIT_Q_COUNTER(out_of_buffer), 25 + }; 26 + 27 + static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { 28 + INIT_Q_COUNTER(out_of_sequence), 29 + }; 30 + 31 + static const struct mlx5_ib_counter retrans_q_cnts[] = { 32 + INIT_Q_COUNTER(duplicate_request), 33 + INIT_Q_COUNTER(rnr_nak_retry_err), 34 + INIT_Q_COUNTER(packet_seq_err), 35 + INIT_Q_COUNTER(implied_nak_seq_err), 36 + INIT_Q_COUNTER(local_ack_timeout_err), 37 + }; 38 + 39 + #define INIT_CONG_COUNTER(_name) \ 40 + { .name = #_name, .offset = \ 41 + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} 42 + 43 + static const struct mlx5_ib_counter cong_cnts[] = { 44 + INIT_CONG_COUNTER(rp_cnp_ignored), 45 + INIT_CONG_COUNTER(rp_cnp_handled), 46 + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), 47 + INIT_CONG_COUNTER(np_cnp_sent), 48 + }; 49 + 50 + static const struct mlx5_ib_counter extended_err_cnts[] = { 51 + INIT_Q_COUNTER(resp_local_length_error), 52 + INIT_Q_COUNTER(resp_cqe_error), 53 + INIT_Q_COUNTER(req_cqe_error), 54 + INIT_Q_COUNTER(req_remote_invalid_request), 55 + INIT_Q_COUNTER(req_remote_access_errors), 56 + INIT_Q_COUNTER(resp_remote_access_errors), 57 + INIT_Q_COUNTER(resp_cqe_flush_error), 58 + INIT_Q_COUNTER(req_cqe_flush_error), 59 + }; 60 + 61 + static const struct mlx5_ib_counter roce_accl_cnts[] = { 62 + INIT_Q_COUNTER(roce_adp_retrans), 63 + INIT_Q_COUNTER(roce_adp_retrans_to), 64 + INIT_Q_COUNTER(roce_slow_restart), 65 + INIT_Q_COUNTER(roce_slow_restart_cnps), 66 + INIT_Q_COUNTER(roce_slow_restart_trans), 67 + }; 68 + 69 + #define INIT_EXT_PPCNT_COUNTER(_name) \ 70 + { .name = #_name, .offset = \ 71 + MLX5_BYTE_OFF(ppcnt_reg, \ 72 + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} 73 + 74 + static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { 75 + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), 76 + }; 77 + 78 + static int mlx5_ib_read_counters(struct ib_counters *counters, 79 + struct ib_counters_read_attr *read_attr, 80 + struct uverbs_attr_bundle *attrs) 81 + { 82 + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 83 + struct mlx5_read_counters_attr mread_attr = {}; 84 + struct mlx5_ib_flow_counters_desc *desc; 85 + int ret, i; 86 + 87 + mutex_lock(&mcounters->mcntrs_mutex); 88 + if (mcounters->cntrs_max_index > read_attr->ncounters) { 89 + ret = -EINVAL; 90 + goto err_bound; 91 + } 92 + 93 + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), 94 + GFP_KERNEL); 95 + if (!mread_attr.out) { 96 + ret = -ENOMEM; 97 + goto err_bound; 98 + } 99 + 100 + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; 101 + mread_attr.flags = read_attr->flags; 102 + ret = mcounters->read_counters(counters->device, &mread_attr); 103 + if (ret) 104 + goto err_read; 105 + 106 + /* do the pass over the counters data array to assign according to the 107 + * descriptions and indexing pairs 108 + */ 109 + desc = mcounters->counters_data; 110 + for (i = 0; i < mcounters->ncounters; i++) 111 + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; 112 + 113 + err_read: 114 + kfree(mread_attr.out); 115 + err_bound: 116 + mutex_unlock(&mcounters->mcntrs_mutex); 117 + return ret; 118 + } 119 + 120 + static void mlx5_ib_destroy_counters(struct ib_counters *counters) 121 + { 122 + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 123 + 124 + mlx5_ib_counters_clear_description(counters); 125 + if (mcounters->hw_cntrs_hndl) 126 + mlx5_fc_destroy(to_mdev(counters->device)->mdev, 127 + mcounters->hw_cntrs_hndl); 128 + } 129 + 130 + static int mlx5_ib_create_counters(struct ib_counters *counters, 131 + struct uverbs_attr_bundle *attrs) 132 + { 133 + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 134 + 135 + mutex_init(&mcounters->mcntrs_mutex); 136 + return 0; 137 + } 138 + 139 + 140 + static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) 141 + { 142 + return MLX5_ESWITCH_MANAGER(mdev) && 143 + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == 144 + MLX5_ESWITCH_OFFLOADS; 145 + } 146 + 147 + static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, 148 + u8 port_num) 149 + { 150 + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : 151 + &dev->port[port_num].cnts; 152 + } 153 + 154 + /** 155 + * mlx5_ib_get_counters_id - Returns counters id to use for device+port 156 + * @dev: Pointer to mlx5 IB device 157 + * @port_num: Zero based port number 158 + * 159 + * mlx5_ib_get_counters_id() Returns counters set id to use for given 160 + * device port combination in switchdev and non switchdev mode of the 161 + * parent device. 162 + */ 163 + u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) 164 + { 165 + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 166 + 167 + return cnts->set_id; 168 + } 169 + 170 + static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 171 + u8 port_num) 172 + { 173 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 174 + const struct mlx5_ib_counters *cnts; 175 + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); 176 + 177 + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) 178 + return NULL; 179 + 180 + cnts = get_counters(dev, port_num - 1); 181 + 182 + return rdma_alloc_hw_stats_struct(cnts->names, 183 + cnts->num_q_counters + 184 + cnts->num_cong_counters + 185 + cnts->num_ext_ppcnt_counters, 186 + RDMA_HW_STATS_DEFAULT_LIFESPAN); 187 + } 188 + 189 + static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, 190 + const struct mlx5_ib_counters *cnts, 191 + struct rdma_hw_stats *stats, 192 + u16 set_id) 193 + { 194 + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 195 + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 196 + __be32 val; 197 + int ret, i; 198 + 199 + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 200 + MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); 201 + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 202 + if (ret) 203 + return ret; 204 + 205 + for (i = 0; i < cnts->num_q_counters; i++) { 206 + val = *(__be32 *)((void *)out + cnts->offsets[i]); 207 + stats->value[i] = (u64)be32_to_cpu(val); 208 + } 209 + 210 + return 0; 211 + } 212 + 213 + static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, 214 + const struct mlx5_ib_counters *cnts, 215 + struct rdma_hw_stats *stats) 216 + { 217 + int offset = cnts->num_q_counters + cnts->num_cong_counters; 218 + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; 219 + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 220 + int ret, i; 221 + void *out; 222 + 223 + out = kvzalloc(sz, GFP_KERNEL); 224 + if (!out) 225 + return -ENOMEM; 226 + 227 + MLX5_SET(ppcnt_reg, in, local_port, 1); 228 + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); 229 + ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, 230 + 0, 0); 231 + if (ret) 232 + goto free; 233 + 234 + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) 235 + stats->value[i + offset] = 236 + be64_to_cpup((__be64 *)(out + 237 + cnts->offsets[i + offset])); 238 + free: 239 + kvfree(out); 240 + return ret; 241 + } 242 + 243 + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 244 + struct rdma_hw_stats *stats, 245 + u8 port_num, int index) 246 + { 247 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 248 + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); 249 + struct mlx5_core_dev *mdev; 250 + int ret, num_counters; 251 + u8 mdev_port_num; 252 + 253 + if (!stats) 254 + return -EINVAL; 255 + 256 + num_counters = cnts->num_q_counters + 257 + cnts->num_cong_counters + 258 + cnts->num_ext_ppcnt_counters; 259 + 260 + /* q_counters are per IB device, query the master mdev */ 261 + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); 262 + if (ret) 263 + return ret; 264 + 265 + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 266 + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); 267 + if (ret) 268 + return ret; 269 + } 270 + 271 + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 272 + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, 273 + &mdev_port_num); 274 + if (!mdev) { 275 + /* If port is not affiliated yet, its in down state 276 + * which doesn't have any counters yet, so it would be 277 + * zero. So no need to read from the HCA. 278 + */ 279 + goto done; 280 + } 281 + ret = mlx5_lag_query_cong_counters(dev->mdev, 282 + stats->value + 283 + cnts->num_q_counters, 284 + cnts->num_cong_counters, 285 + cnts->offsets + 286 + cnts->num_q_counters); 287 + 288 + mlx5_ib_put_native_port_mdev(dev, port_num); 289 + if (ret) 290 + return ret; 291 + } 292 + 293 + done: 294 + return num_counters; 295 + } 296 + 297 + static struct rdma_hw_stats * 298 + mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) 299 + { 300 + struct mlx5_ib_dev *dev = to_mdev(counter->device); 301 + const struct mlx5_ib_counters *cnts = 302 + get_counters(dev, counter->port - 1); 303 + 304 + return rdma_alloc_hw_stats_struct(cnts->names, 305 + cnts->num_q_counters + 306 + cnts->num_cong_counters + 307 + cnts->num_ext_ppcnt_counters, 308 + RDMA_HW_STATS_DEFAULT_LIFESPAN); 309 + } 310 + 311 + static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) 312 + { 313 + struct mlx5_ib_dev *dev = to_mdev(counter->device); 314 + const struct mlx5_ib_counters *cnts = 315 + get_counters(dev, counter->port - 1); 316 + 317 + return mlx5_ib_query_q_counters(dev->mdev, cnts, 318 + counter->stats, counter->id); 319 + } 320 + 321 + static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) 322 + { 323 + struct mlx5_ib_dev *dev = to_mdev(counter->device); 324 + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 325 + 326 + if (!counter->id) 327 + return 0; 328 + 329 + MLX5_SET(dealloc_q_counter_in, in, opcode, 330 + MLX5_CMD_OP_DEALLOC_Q_COUNTER); 331 + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); 332 + return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 333 + } 334 + 335 + static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, 336 + struct ib_qp *qp) 337 + { 338 + struct mlx5_ib_dev *dev = to_mdev(qp->device); 339 + int err; 340 + 341 + if (!counter->id) { 342 + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 343 + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 344 + 345 + MLX5_SET(alloc_q_counter_in, in, opcode, 346 + MLX5_CMD_OP_ALLOC_Q_COUNTER); 347 + MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); 348 + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 349 + if (err) 350 + return err; 351 + counter->id = 352 + MLX5_GET(alloc_q_counter_out, out, counter_set_id); 353 + } 354 + 355 + err = mlx5_ib_qp_set_counter(qp, counter); 356 + if (err) 357 + goto fail_set_counter; 358 + 359 + return 0; 360 + 361 + fail_set_counter: 362 + mlx5_ib_counter_dealloc(counter); 363 + counter->id = 0; 364 + 365 + return err; 366 + } 367 + 368 + static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) 369 + { 370 + return mlx5_ib_qp_set_counter(qp, NULL); 371 + } 372 + 373 + 374 + static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 375 + const char **names, 376 + size_t *offsets) 377 + { 378 + int i; 379 + int j = 0; 380 + 381 + for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { 382 + names[j] = basic_q_cnts[i].name; 383 + offsets[j] = basic_q_cnts[i].offset; 384 + } 385 + 386 + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 387 + for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { 388 + names[j] = out_of_seq_q_cnts[i].name; 389 + offsets[j] = out_of_seq_q_cnts[i].offset; 390 + } 391 + } 392 + 393 + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 394 + for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { 395 + names[j] = retrans_q_cnts[i].name; 396 + offsets[j] = retrans_q_cnts[i].offset; 397 + } 398 + } 399 + 400 + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 401 + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { 402 + names[j] = extended_err_cnts[i].name; 403 + offsets[j] = extended_err_cnts[i].offset; 404 + } 405 + } 406 + 407 + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { 408 + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { 409 + names[j] = roce_accl_cnts[i].name; 410 + offsets[j] = roce_accl_cnts[i].offset; 411 + } 412 + } 413 + 414 + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 415 + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 416 + names[j] = cong_cnts[i].name; 417 + offsets[j] = cong_cnts[i].offset; 418 + } 419 + } 420 + 421 + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 422 + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { 423 + names[j] = ext_ppcnt_cnts[i].name; 424 + offsets[j] = ext_ppcnt_cnts[i].offset; 425 + } 426 + } 427 + } 428 + 429 + 430 + static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 431 + struct mlx5_ib_counters *cnts) 432 + { 433 + u32 num_counters; 434 + 435 + num_counters = ARRAY_SIZE(basic_q_cnts); 436 + 437 + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) 438 + num_counters += ARRAY_SIZE(out_of_seq_q_cnts); 439 + 440 + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 441 + num_counters += ARRAY_SIZE(retrans_q_cnts); 442 + 443 + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) 444 + num_counters += ARRAY_SIZE(extended_err_cnts); 445 + 446 + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) 447 + num_counters += ARRAY_SIZE(roce_accl_cnts); 448 + 449 + cnts->num_q_counters = num_counters; 450 + 451 + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 452 + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 453 + num_counters += ARRAY_SIZE(cong_cnts); 454 + } 455 + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 456 + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); 457 + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); 458 + } 459 + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); 460 + if (!cnts->names) 461 + return -ENOMEM; 462 + 463 + cnts->offsets = kcalloc(num_counters, 464 + sizeof(cnts->offsets), GFP_KERNEL); 465 + if (!cnts->offsets) 466 + goto err_names; 467 + 468 + return 0; 469 + 470 + err_names: 471 + kfree(cnts->names); 472 + cnts->names = NULL; 473 + return -ENOMEM; 474 + } 475 + 476 + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 477 + { 478 + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 479 + int num_cnt_ports; 480 + int i; 481 + 482 + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; 483 + 484 + MLX5_SET(dealloc_q_counter_in, in, opcode, 485 + MLX5_CMD_OP_DEALLOC_Q_COUNTER); 486 + 487 + for (i = 0; i < num_cnt_ports; i++) { 488 + if (dev->port[i].cnts.set_id) { 489 + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 490 + dev->port[i].cnts.set_id); 491 + mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 492 + } 493 + kfree(dev->port[i].cnts.names); 494 + kfree(dev->port[i].cnts.offsets); 495 + } 496 + } 497 + 498 + static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 499 + { 500 + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 501 + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 502 + int num_cnt_ports; 503 + int err = 0; 504 + int i; 505 + bool is_shared; 506 + 507 + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 508 + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; 509 + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; 510 + 511 + for (i = 0; i < num_cnt_ports; i++) { 512 + err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); 513 + if (err) 514 + goto err_alloc; 515 + 516 + mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, 517 + dev->port[i].cnts.offsets); 518 + 519 + MLX5_SET(alloc_q_counter_in, in, uid, 520 + is_shared ? MLX5_SHARED_RESOURCE_UID : 0); 521 + 522 + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 523 + if (err) { 524 + mlx5_ib_warn(dev, 525 + "couldn't allocate queue counter for port %d, err %d\n", 526 + i + 1, err); 527 + goto err_alloc; 528 + } 529 + 530 + dev->port[i].cnts.set_id = 531 + MLX5_GET(alloc_q_counter_out, out, counter_set_id); 532 + } 533 + return 0; 534 + 535 + err_alloc: 536 + mlx5_ib_dealloc_counters(dev); 537 + return err; 538 + } 539 + 540 + static int read_flow_counters(struct ib_device *ibdev, 541 + struct mlx5_read_counters_attr *read_attr) 542 + { 543 + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; 544 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 545 + 546 + return mlx5_fc_query(dev->mdev, fc, 547 + &read_attr->out[IB_COUNTER_PACKETS], 548 + &read_attr->out[IB_COUNTER_BYTES]); 549 + } 550 + 551 + /* flow counters currently expose two counters packets and bytes */ 552 + #define FLOW_COUNTERS_NUM 2 553 + static int counters_set_description( 554 + struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, 555 + struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) 556 + { 557 + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 558 + u32 cntrs_max_index = 0; 559 + int i; 560 + 561 + if (counters_type != MLX5_IB_COUNTERS_FLOW) 562 + return -EINVAL; 563 + 564 + /* init the fields for the object */ 565 + mcounters->type = counters_type; 566 + mcounters->read_counters = read_flow_counters; 567 + mcounters->counters_num = FLOW_COUNTERS_NUM; 568 + mcounters->ncounters = ncounters; 569 + /* each counter entry have both description and index pair */ 570 + for (i = 0; i < ncounters; i++) { 571 + if (desc_data[i].description > IB_COUNTER_BYTES) 572 + return -EINVAL; 573 + 574 + if (cntrs_max_index <= desc_data[i].index) 575 + cntrs_max_index = desc_data[i].index + 1; 576 + } 577 + 578 + mutex_lock(&mcounters->mcntrs_mutex); 579 + mcounters->counters_data = desc_data; 580 + mcounters->cntrs_max_index = cntrs_max_index; 581 + mutex_unlock(&mcounters->mcntrs_mutex); 582 + 583 + return 0; 584 + } 585 + 586 + #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) 587 + int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, 588 + struct mlx5_ib_create_flow *ucmd) 589 + { 590 + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); 591 + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; 592 + struct mlx5_ib_flow_counters_desc *desc_data = NULL; 593 + bool hw_hndl = false; 594 + int ret = 0; 595 + 596 + if (ucmd && ucmd->ncounters_data != 0) { 597 + cntrs_data = ucmd->data; 598 + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) 599 + return -EINVAL; 600 + 601 + desc_data = kcalloc(cntrs_data->ncounters, 602 + sizeof(*desc_data), 603 + GFP_KERNEL); 604 + if (!desc_data) 605 + return -ENOMEM; 606 + 607 + if (copy_from_user(desc_data, 608 + u64_to_user_ptr(cntrs_data->counters_data), 609 + sizeof(*desc_data) * cntrs_data->ncounters)) { 610 + ret = -EFAULT; 611 + goto free; 612 + } 613 + } 614 + 615 + if (!mcounters->hw_cntrs_hndl) { 616 + mcounters->hw_cntrs_hndl = mlx5_fc_create( 617 + to_mdev(ibcounters->device)->mdev, false); 618 + if (IS_ERR(mcounters->hw_cntrs_hndl)) { 619 + ret = PTR_ERR(mcounters->hw_cntrs_hndl); 620 + goto free; 621 + } 622 + hw_hndl = true; 623 + } 624 + 625 + if (desc_data) { 626 + /* counters already bound to at least one flow */ 627 + if (mcounters->cntrs_max_index) { 628 + ret = -EINVAL; 629 + goto free_hndl; 630 + } 631 + 632 + ret = counters_set_description(ibcounters, 633 + MLX5_IB_COUNTERS_FLOW, 634 + desc_data, 635 + cntrs_data->ncounters); 636 + if (ret) 637 + goto free_hndl; 638 + 639 + } else if (!mcounters->cntrs_max_index) { 640 + /* counters not bound yet, must have udata passed */ 641 + ret = -EINVAL; 642 + goto free_hndl; 643 + } 644 + 645 + return 0; 646 + 647 + free_hndl: 648 + if (hw_hndl) { 649 + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, 650 + mcounters->hw_cntrs_hndl); 651 + mcounters->hw_cntrs_hndl = NULL; 652 + } 653 + free: 654 + kfree(desc_data); 655 + return ret; 656 + } 657 + 658 + void mlx5_ib_counters_clear_description(struct ib_counters *counters) 659 + { 660 + struct mlx5_ib_mcounters *mcounters; 661 + 662 + if (!counters || atomic_read(&counters->usecnt) != 1) 663 + return; 664 + 665 + mcounters = to_mcounters(counters); 666 + 667 + mutex_lock(&mcounters->mcntrs_mutex); 668 + kfree(mcounters->counters_data); 669 + mcounters->counters_data = NULL; 670 + mcounters->cntrs_max_index = 0; 671 + mutex_unlock(&mcounters->mcntrs_mutex); 672 + } 673 + 674 + static const struct ib_device_ops hw_stats_ops = { 675 + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, 676 + .get_hw_stats = mlx5_ib_get_hw_stats, 677 + .counter_bind_qp = mlx5_ib_counter_bind_qp, 678 + .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 679 + .counter_dealloc = mlx5_ib_counter_dealloc, 680 + .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 681 + .counter_update_stats = mlx5_ib_counter_update_stats, 682 + }; 683 + 684 + static const struct ib_device_ops counters_ops = { 685 + .create_counters = mlx5_ib_create_counters, 686 + .destroy_counters = mlx5_ib_destroy_counters, 687 + .read_counters = mlx5_ib_read_counters, 688 + 689 + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), 690 + }; 691 + 692 + int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) 693 + { 694 + ib_set_device_ops(&dev->ib_dev, &counters_ops); 695 + 696 + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 697 + return 0; 698 + 699 + ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); 700 + return mlx5_ib_alloc_counters(dev); 701 + } 702 + 703 + void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) 704 + { 705 + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 706 + return; 707 + 708 + mlx5_ib_dealloc_counters(dev); 709 + }
+17
drivers/infiniband/hw/mlx5/counters.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* 3 + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #ifndef _MLX5_IB_COUNTERS_H 7 + #define _MLX5_IB_COUNTERS_H 8 + 9 + #include "mlx5_ib.h" 10 + 11 + int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); 12 + void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); 13 + void mlx5_ib_counters_clear_description(struct ib_counters *counters); 14 + int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, 15 + struct mlx5_ib_create_flow *ucmd); 16 + u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); 17 + #endif /* _MLX5_IB_COUNTERS_H */
+28 -74
drivers/infiniband/hw/mlx5/devx.c
··· 14 14 #include <linux/mlx5/driver.h> 15 15 #include <linux/mlx5/fs.h> 16 16 #include "mlx5_ib.h" 17 + #include "devx.h" 17 18 #include "qp.h" 18 19 #include <linux/xarray.h> 19 20 ··· 90 89 u8 is_destroyed:1; 91 90 }; 92 91 93 - #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) 94 - struct devx_obj { 95 - struct mlx5_ib_dev *ib_dev; 96 - u64 obj_id; 97 - u32 dinlen; /* destroy inbox length */ 98 - u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; 99 - u32 flags; 100 - union { 101 - struct mlx5_ib_devx_mr devx_mr; 102 - struct mlx5_core_dct core_dct; 103 - struct mlx5_core_cq core_cq; 104 - u32 flow_counter_bulk_size; 105 - }; 106 - struct list_head event_sub; /* holds devx_event_subscription entries */ 107 - }; 108 - 109 92 struct devx_umem { 110 93 struct mlx5_core_dev *mdev; 111 94 struct ib_umem *umem; ··· 154 169 MLX5_SET(destroy_uctx_in, in, uid, uid); 155 170 156 171 mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); 157 - } 158 - 159 - bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) 160 - { 161 - struct devx_obj *devx_obj = obj; 162 - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); 163 - 164 - switch (opcode) { 165 - case MLX5_CMD_OP_DESTROY_TIR: 166 - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 167 - *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, 168 - obj_id); 169 - return true; 170 - 171 - case MLX5_CMD_OP_DESTROY_FLOW_TABLE: 172 - *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 173 - *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, 174 - table_id); 175 - return true; 176 - default: 177 - return false; 178 - } 179 - } 180 - 181 - bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) 182 - { 183 - struct devx_obj *devx_obj = obj; 184 - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); 185 - 186 - if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { 187 - 188 - if (offset && offset >= devx_obj->flow_counter_bulk_size) 189 - return false; 190 - 191 - *counter_id = MLX5_GET(dealloc_flow_counter_in, 192 - devx_obj->dinbox, 193 - flow_counter_id); 194 - *counter_id += offset; 195 - return true; 196 - } 197 - 198 - return false; 199 172 } 200 173 201 174 static bool is_legacy_unaffiliated_event_num(u16 event_num) ··· 2362 2419 return NOTIFY_OK; 2363 2420 } 2364 2421 2365 - void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) 2422 + int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) 2366 2423 { 2367 2424 struct mlx5_devx_event_table *table = &dev->devx_event_table; 2425 + int uid; 2368 2426 2369 - xa_init(&table->event_xa); 2370 - mutex_init(&table->event_xa_lock); 2371 - MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); 2372 - mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); 2427 + uid = mlx5_ib_devx_create(dev, false); 2428 + if (uid > 0) { 2429 + dev->devx_whitelist_uid = uid; 2430 + xa_init(&table->event_xa); 2431 + mutex_init(&table->event_xa_lock); 2432 + MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); 2433 + mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); 2434 + } 2435 + 2436 + return 0; 2373 2437 } 2374 2438 2375 - void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) 2439 + void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) 2376 2440 { 2377 2441 struct mlx5_devx_event_table *table = &dev->devx_event_table; 2378 2442 struct devx_event_subscription *sub, *tmp; ··· 2387 2437 void *entry; 2388 2438 unsigned long id; 2389 2439 2390 - mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); 2391 - mutex_lock(&dev->devx_event_table.event_xa_lock); 2392 - xa_for_each(&table->event_xa, id, entry) { 2393 - event = entry; 2394 - list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, 2395 - xa_list) 2396 - devx_cleanup_subscription(dev, sub); 2397 - kfree(entry); 2440 + if (dev->devx_whitelist_uid) { 2441 + mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); 2442 + mutex_lock(&dev->devx_event_table.event_xa_lock); 2443 + xa_for_each(&table->event_xa, id, entry) { 2444 + event = entry; 2445 + list_for_each_entry_safe( 2446 + sub, tmp, &event->unaffiliated_list, xa_list) 2447 + devx_cleanup_subscription(dev, sub); 2448 + kfree(entry); 2449 + } 2450 + mutex_unlock(&dev->devx_event_table.event_xa_lock); 2451 + xa_destroy(&table->event_xa); 2452 + 2453 + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); 2398 2454 } 2399 - mutex_unlock(&dev->devx_event_table.event_xa_lock); 2400 - xa_destroy(&table->event_xa); 2401 2455 } 2402 2456 2403 2457 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+45
drivers/infiniband/hw/mlx5/devx.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* 3 + * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #ifndef _MLX5_IB_DEVX_H 7 + #define _MLX5_IB_DEVX_H 8 + 9 + #include "mlx5_ib.h" 10 + 11 + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) 12 + struct devx_obj { 13 + struct mlx5_ib_dev *ib_dev; 14 + u64 obj_id; 15 + u32 dinlen; /* destroy inbox length */ 16 + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; 17 + u32 flags; 18 + union { 19 + struct mlx5_ib_devx_mr devx_mr; 20 + struct mlx5_core_dct core_dct; 21 + struct mlx5_core_cq core_cq; 22 + u32 flow_counter_bulk_size; 23 + }; 24 + struct list_head event_sub; /* holds devx_event_subscription entries */ 25 + }; 26 + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 27 + int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); 28 + void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); 29 + int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); 30 + void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); 31 + #else 32 + static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) 33 + { 34 + return -EOPNOTSUPP; 35 + } 36 + static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} 37 + static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) 38 + { 39 + return 0; 40 + } 41 + static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) 42 + { 43 + } 44 + #endif 45 + #endif /* _MLX5_IB_DEVX_H */
-765
drivers/infiniband/hw/mlx5/flow.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 - /* 3 - * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. 4 - */ 5 - 6 - #include <rdma/ib_user_verbs.h> 7 - #include <rdma/ib_verbs.h> 8 - #include <rdma/uverbs_types.h> 9 - #include <rdma/uverbs_ioctl.h> 10 - #include <rdma/uverbs_std_types.h> 11 - #include <rdma/mlx5_user_ioctl_cmds.h> 12 - #include <rdma/mlx5_user_ioctl_verbs.h> 13 - #include <rdma/ib_umem.h> 14 - #include <linux/mlx5/driver.h> 15 - #include <linux/mlx5/fs.h> 16 - #include "mlx5_ib.h" 17 - 18 - #define UVERBS_MODULE_NAME mlx5_ib 19 - #include <rdma/uverbs_named_ioctl.h> 20 - 21 - static int 22 - mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, 23 - enum mlx5_flow_namespace_type *namespace) 24 - { 25 - switch (table_type) { 26 - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: 27 - *namespace = MLX5_FLOW_NAMESPACE_BYPASS; 28 - break; 29 - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: 30 - *namespace = MLX5_FLOW_NAMESPACE_EGRESS; 31 - break; 32 - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: 33 - *namespace = MLX5_FLOW_NAMESPACE_FDB; 34 - break; 35 - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: 36 - *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; 37 - break; 38 - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: 39 - *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; 40 - break; 41 - default: 42 - return -EINVAL; 43 - } 44 - 45 - return 0; 46 - } 47 - 48 - static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { 49 - [MLX5_IB_FLOW_TYPE_NORMAL] = { 50 - .type = UVERBS_ATTR_TYPE_PTR_IN, 51 - .u.ptr = { 52 - .len = sizeof(u16), /* data is priority */ 53 - .min_len = sizeof(u16), 54 - } 55 - }, 56 - [MLX5_IB_FLOW_TYPE_SNIFFER] = { 57 - .type = UVERBS_ATTR_TYPE_PTR_IN, 58 - UVERBS_ATTR_NO_DATA(), 59 - }, 60 - [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { 61 - .type = UVERBS_ATTR_TYPE_PTR_IN, 62 - UVERBS_ATTR_NO_DATA(), 63 - }, 64 - [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { 65 - .type = UVERBS_ATTR_TYPE_PTR_IN, 66 - UVERBS_ATTR_NO_DATA(), 67 - }, 68 - }; 69 - 70 - static int get_dests(struct uverbs_attr_bundle *attrs, 71 - struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, 72 - int *dest_type, struct ib_qp **qp, u32 *flags) 73 - { 74 - bool dest_devx, dest_qp; 75 - void *devx_obj; 76 - int err; 77 - 78 - dest_devx = uverbs_attr_is_valid(attrs, 79 - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); 80 - dest_qp = uverbs_attr_is_valid(attrs, 81 - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 82 - 83 - *flags = 0; 84 - err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, 85 - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | 86 - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); 87 - if (err) 88 - return err; 89 - 90 - /* Both flags are not allowed */ 91 - if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && 92 - *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) 93 - return -EINVAL; 94 - 95 - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { 96 - if (dest_devx && (dest_qp || *flags)) 97 - return -EINVAL; 98 - else if (dest_qp && *flags) 99 - return -EINVAL; 100 - } 101 - 102 - /* Allow only DEVX object, drop as dest for FDB */ 103 - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || 104 - (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) 105 - return -EINVAL; 106 - 107 - /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ 108 - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && 109 - ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) 110 - return -EINVAL; 111 - 112 - *qp = NULL; 113 - if (dest_devx) { 114 - devx_obj = 115 - uverbs_attr_get_obj(attrs, 116 - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); 117 - 118 - /* Verify that the given DEVX object is a flow 119 - * steering destination. 120 - */ 121 - if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type)) 122 - return -EINVAL; 123 - /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ 124 - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || 125 - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && 126 - *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) 127 - return -EINVAL; 128 - } else if (dest_qp) { 129 - struct mlx5_ib_qp *mqp; 130 - 131 - *qp = uverbs_attr_get_obj(attrs, 132 - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 133 - if (IS_ERR(*qp)) 134 - return PTR_ERR(*qp); 135 - 136 - if ((*qp)->qp_type != IB_QPT_RAW_PACKET) 137 - return -EINVAL; 138 - 139 - mqp = to_mqp(*qp); 140 - if (mqp->is_rss) 141 - *dest_id = mqp->rss_qp.tirn; 142 - else 143 - *dest_id = mqp->raw_packet_qp.rq.tirn; 144 - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 145 - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { 146 - *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; 147 - } 148 - 149 - if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && 150 - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) 151 - return -EINVAL; 152 - 153 - return 0; 154 - } 155 - 156 - #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 157 - static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( 158 - struct uverbs_attr_bundle *attrs) 159 - { 160 - struct mlx5_flow_context flow_context = {.flow_tag = 161 - MLX5_FS_DEFAULT_FLOW_TAG}; 162 - u32 *offset_attr, offset = 0, counter_id = 0; 163 - int dest_id, dest_type, inlen, len, ret, i; 164 - struct mlx5_ib_flow_handler *flow_handler; 165 - struct mlx5_ib_flow_matcher *fs_matcher; 166 - struct ib_uobject **arr_flow_actions; 167 - struct ib_uflow_resources *uflow_res; 168 - struct mlx5_flow_act flow_act = {}; 169 - struct ib_qp *qp = NULL; 170 - void *devx_obj, *cmd_in; 171 - struct ib_uobject *uobj; 172 - struct mlx5_ib_dev *dev; 173 - u32 flags; 174 - 175 - if (!capable(CAP_NET_RAW)) 176 - return -EPERM; 177 - 178 - fs_matcher = uverbs_attr_get_obj(attrs, 179 - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); 180 - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); 181 - dev = mlx5_udata_to_mdev(&attrs->driver_udata); 182 - 183 - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) 184 - return -EINVAL; 185 - 186 - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) 187 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; 188 - 189 - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) 190 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 191 - 192 - len = uverbs_attr_get_uobjs_arr(attrs, 193 - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); 194 - if (len) { 195 - devx_obj = arr_flow_actions[0]->object; 196 - 197 - if (uverbs_attr_is_valid(attrs, 198 - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { 199 - 200 - int num_offsets = uverbs_attr_ptr_get_array_size( 201 - attrs, 202 - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, 203 - sizeof(u32)); 204 - 205 - if (num_offsets != 1) 206 - return -EINVAL; 207 - 208 - offset_attr = uverbs_attr_get_alloced_ptr( 209 - attrs, 210 - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); 211 - offset = *offset_attr; 212 - } 213 - 214 - if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, 215 - &counter_id)) 216 - return -EINVAL; 217 - 218 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 219 - } 220 - 221 - cmd_in = uverbs_attr_get_alloced_ptr( 222 - attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 223 - inlen = uverbs_attr_get_len(attrs, 224 - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 225 - 226 - uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); 227 - if (!uflow_res) 228 - return -ENOMEM; 229 - 230 - len = uverbs_attr_get_uobjs_arr(attrs, 231 - MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); 232 - for (i = 0; i < len; i++) { 233 - struct mlx5_ib_flow_action *maction = 234 - to_mflow_act(arr_flow_actions[i]->object); 235 - 236 - ret = parse_flow_flow_action(maction, false, &flow_act); 237 - if (ret) 238 - goto err_out; 239 - flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, 240 - arr_flow_actions[i]->object); 241 - } 242 - 243 - ret = uverbs_copy_from(&flow_context.flow_tag, attrs, 244 - MLX5_IB_ATTR_CREATE_FLOW_TAG); 245 - if (!ret) { 246 - if (flow_context.flow_tag >= BIT(24)) { 247 - ret = -EINVAL; 248 - goto err_out; 249 - } 250 - flow_context.flags |= FLOW_CONTEXT_HAS_TAG; 251 - } 252 - 253 - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, 254 - &flow_context, 255 - &flow_act, 256 - counter_id, 257 - cmd_in, inlen, 258 - dest_id, dest_type); 259 - if (IS_ERR(flow_handler)) { 260 - ret = PTR_ERR(flow_handler); 261 - goto err_out; 262 - } 263 - 264 - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); 265 - 266 - return 0; 267 - err_out: 268 - ib_uverbs_flow_resources_free(uflow_res); 269 - return ret; 270 - } 271 - 272 - static int flow_matcher_cleanup(struct ib_uobject *uobject, 273 - enum rdma_remove_reason why, 274 - struct uverbs_attr_bundle *attrs) 275 - { 276 - struct mlx5_ib_flow_matcher *obj = uobject->object; 277 - int ret; 278 - 279 - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); 280 - if (ret) 281 - return ret; 282 - 283 - kfree(obj); 284 - return 0; 285 - } 286 - 287 - static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, 288 - struct mlx5_ib_flow_matcher *obj) 289 - { 290 - enum mlx5_ib_uapi_flow_table_type ft_type = 291 - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; 292 - u32 flags; 293 - int err; 294 - 295 - /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older 296 - * users should switch to it. We leave this to not break userspace 297 - */ 298 - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && 299 - uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) 300 - return -EINVAL; 301 - 302 - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { 303 - err = uverbs_get_const(&ft_type, attrs, 304 - MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); 305 - if (err) 306 - return err; 307 - 308 - err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); 309 - if (err) 310 - return err; 311 - 312 - return 0; 313 - } 314 - 315 - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { 316 - err = uverbs_get_flags32(&flags, attrs, 317 - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 318 - IB_FLOW_ATTR_FLAGS_EGRESS); 319 - if (err) 320 - return err; 321 - 322 - if (flags) { 323 - mlx5_ib_ft_type_to_namespace( 324 - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, 325 - &obj->ns_type); 326 - return 0; 327 - } 328 - } 329 - 330 - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; 331 - 332 - return 0; 333 - } 334 - 335 - static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( 336 - struct uverbs_attr_bundle *attrs) 337 - { 338 - struct ib_uobject *uobj = uverbs_attr_get_uobject( 339 - attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); 340 - struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); 341 - struct mlx5_ib_flow_matcher *obj; 342 - int err; 343 - 344 - obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); 345 - if (!obj) 346 - return -ENOMEM; 347 - 348 - obj->mask_len = uverbs_attr_get_len( 349 - attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 350 - err = uverbs_copy_from(&obj->matcher_mask, 351 - attrs, 352 - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 353 - if (err) 354 - goto end; 355 - 356 - obj->flow_type = uverbs_attr_get_enum_id( 357 - attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); 358 - 359 - if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { 360 - err = uverbs_copy_from(&obj->priority, 361 - attrs, 362 - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); 363 - if (err) 364 - goto end; 365 - } 366 - 367 - err = uverbs_copy_from(&obj->match_criteria_enable, 368 - attrs, 369 - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); 370 - if (err) 371 - goto end; 372 - 373 - err = mlx5_ib_matcher_ns(attrs, obj); 374 - if (err) 375 - goto end; 376 - 377 - uobj->object = obj; 378 - obj->mdev = dev->mdev; 379 - atomic_set(&obj->usecnt, 0); 380 - return 0; 381 - 382 - end: 383 - kfree(obj); 384 - return err; 385 - } 386 - 387 - void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) 388 - { 389 - switch (maction->flow_action_raw.sub_type) { 390 - case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: 391 - mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, 392 - maction->flow_action_raw.modify_hdr); 393 - break; 394 - case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: 395 - mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, 396 - maction->flow_action_raw.pkt_reformat); 397 - break; 398 - case MLX5_IB_FLOW_ACTION_DECAP: 399 - break; 400 - default: 401 - break; 402 - } 403 - } 404 - 405 - static struct ib_flow_action * 406 - mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, 407 - enum mlx5_ib_uapi_flow_table_type ft_type, 408 - u8 num_actions, void *in) 409 - { 410 - enum mlx5_flow_namespace_type namespace; 411 - struct mlx5_ib_flow_action *maction; 412 - int ret; 413 - 414 - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 415 - if (ret) 416 - return ERR_PTR(-EINVAL); 417 - 418 - maction = kzalloc(sizeof(*maction), GFP_KERNEL); 419 - if (!maction) 420 - return ERR_PTR(-ENOMEM); 421 - 422 - maction->flow_action_raw.modify_hdr = 423 - mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); 424 - 425 - if (IS_ERR(maction->flow_action_raw.modify_hdr)) { 426 - ret = PTR_ERR(maction->flow_action_raw.modify_hdr); 427 - kfree(maction); 428 - return ERR_PTR(ret); 429 - } 430 - maction->flow_action_raw.sub_type = 431 - MLX5_IB_FLOW_ACTION_MODIFY_HEADER; 432 - maction->flow_action_raw.dev = dev; 433 - 434 - return &maction->ib_action; 435 - } 436 - 437 - static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) 438 - { 439 - return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 440 - max_modify_header_actions) || 441 - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 442 - max_modify_header_actions) || 443 - MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 444 - max_modify_header_actions); 445 - } 446 - 447 - static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( 448 - struct uverbs_attr_bundle *attrs) 449 - { 450 - struct ib_uobject *uobj = uverbs_attr_get_uobject( 451 - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); 452 - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); 453 - enum mlx5_ib_uapi_flow_table_type ft_type; 454 - struct ib_flow_action *action; 455 - int num_actions; 456 - void *in; 457 - int ret; 458 - 459 - if (!mlx5_ib_modify_header_supported(mdev)) 460 - return -EOPNOTSUPP; 461 - 462 - in = uverbs_attr_get_alloced_ptr(attrs, 463 - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); 464 - 465 - num_actions = uverbs_attr_ptr_get_array_size( 466 - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 467 - MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); 468 - if (num_actions < 0) 469 - return num_actions; 470 - 471 - ret = uverbs_get_const(&ft_type, attrs, 472 - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); 473 - if (ret) 474 - return ret; 475 - action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); 476 - if (IS_ERR(action)) 477 - return PTR_ERR(action); 478 - 479 - uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, 480 - IB_FLOW_ACTION_UNSPECIFIED); 481 - 482 - return 0; 483 - } 484 - 485 - static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, 486 - u8 packet_reformat_type, 487 - u8 ft_type) 488 - { 489 - switch (packet_reformat_type) { 490 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 491 - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 492 - return MLX5_CAP_FLOWTABLE(ibdev->mdev, 493 - encap_general_header); 494 - break; 495 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 496 - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 497 - return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, 498 - reformat_l2_to_l3_tunnel); 499 - break; 500 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 501 - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 502 - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, 503 - reformat_l3_tunnel_to_l2); 504 - break; 505 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: 506 - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 507 - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); 508 - break; 509 - default: 510 - break; 511 - } 512 - 513 - return false; 514 - } 515 - 516 - static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) 517 - { 518 - switch (dv_prt) { 519 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 520 - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; 521 - break; 522 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 523 - *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 524 - break; 525 - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 526 - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; 527 - break; 528 - default: 529 - return -EINVAL; 530 - } 531 - 532 - return 0; 533 - } 534 - 535 - static int mlx5_ib_flow_action_create_packet_reformat_ctx( 536 - struct mlx5_ib_dev *dev, 537 - struct mlx5_ib_flow_action *maction, 538 - u8 ft_type, u8 dv_prt, 539 - void *in, size_t len) 540 - { 541 - enum mlx5_flow_namespace_type namespace; 542 - u8 prm_prt; 543 - int ret; 544 - 545 - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 546 - if (ret) 547 - return ret; 548 - 549 - ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); 550 - if (ret) 551 - return ret; 552 - 553 - maction->flow_action_raw.pkt_reformat = 554 - mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, 555 - in, namespace); 556 - if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { 557 - ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); 558 - return ret; 559 - } 560 - 561 - maction->flow_action_raw.sub_type = 562 - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; 563 - maction->flow_action_raw.dev = dev; 564 - 565 - return 0; 566 - } 567 - 568 - static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( 569 - struct uverbs_attr_bundle *attrs) 570 - { 571 - struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, 572 - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); 573 - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); 574 - enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; 575 - enum mlx5_ib_uapi_flow_table_type ft_type; 576 - struct mlx5_ib_flow_action *maction; 577 - int ret; 578 - 579 - ret = uverbs_get_const(&ft_type, attrs, 580 - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); 581 - if (ret) 582 - return ret; 583 - 584 - ret = uverbs_get_const(&dv_prt, attrs, 585 - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); 586 - if (ret) 587 - return ret; 588 - 589 - if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) 590 - return -EOPNOTSUPP; 591 - 592 - maction = kzalloc(sizeof(*maction), GFP_KERNEL); 593 - if (!maction) 594 - return -ENOMEM; 595 - 596 - if (dv_prt == 597 - MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { 598 - maction->flow_action_raw.sub_type = 599 - MLX5_IB_FLOW_ACTION_DECAP; 600 - maction->flow_action_raw.dev = mdev; 601 - } else { 602 - void *in; 603 - int len; 604 - 605 - in = uverbs_attr_get_alloced_ptr(attrs, 606 - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 607 - if (IS_ERR(in)) { 608 - ret = PTR_ERR(in); 609 - goto free_maction; 610 - } 611 - 612 - len = uverbs_attr_get_len(attrs, 613 - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 614 - 615 - ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, 616 - maction, ft_type, dv_prt, in, len); 617 - if (ret) 618 - goto free_maction; 619 - } 620 - 621 - uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, 622 - IB_FLOW_ACTION_UNSPECIFIED); 623 - return 0; 624 - 625 - free_maction: 626 - kfree(maction); 627 - return ret; 628 - } 629 - 630 - DECLARE_UVERBS_NAMED_METHOD( 631 - MLX5_IB_METHOD_CREATE_FLOW, 632 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, 633 - UVERBS_OBJECT_FLOW, 634 - UVERBS_ACCESS_NEW, 635 - UA_MANDATORY), 636 - UVERBS_ATTR_PTR_IN( 637 - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, 638 - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), 639 - UA_MANDATORY, 640 - UA_ALLOC_AND_COPY), 641 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, 642 - MLX5_IB_OBJECT_FLOW_MATCHER, 643 - UVERBS_ACCESS_READ, 644 - UA_MANDATORY), 645 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, 646 - UVERBS_OBJECT_QP, 647 - UVERBS_ACCESS_READ), 648 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 649 - MLX5_IB_OBJECT_DEVX_OBJ, 650 - UVERBS_ACCESS_READ), 651 - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, 652 - UVERBS_OBJECT_FLOW_ACTION, 653 - UVERBS_ACCESS_READ, 1, 654 - MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, 655 - UA_OPTIONAL), 656 - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, 657 - UVERBS_ATTR_TYPE(u32), 658 - UA_OPTIONAL), 659 - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, 660 - MLX5_IB_OBJECT_DEVX_OBJ, 661 - UVERBS_ACCESS_READ, 1, 1, 662 - UA_OPTIONAL), 663 - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, 664 - UVERBS_ATTR_MIN_SIZE(sizeof(u32)), 665 - UA_OPTIONAL, 666 - UA_ALLOC_AND_COPY), 667 - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, 668 - enum mlx5_ib_create_flow_flags, 669 - UA_OPTIONAL)); 670 - 671 - DECLARE_UVERBS_NAMED_METHOD_DESTROY( 672 - MLX5_IB_METHOD_DESTROY_FLOW, 673 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, 674 - UVERBS_OBJECT_FLOW, 675 - UVERBS_ACCESS_DESTROY, 676 - UA_MANDATORY)); 677 - 678 - ADD_UVERBS_METHODS(mlx5_ib_fs, 679 - UVERBS_OBJECT_FLOW, 680 - &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), 681 - &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); 682 - 683 - DECLARE_UVERBS_NAMED_METHOD( 684 - MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, 685 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, 686 - UVERBS_OBJECT_FLOW_ACTION, 687 - UVERBS_ACCESS_NEW, 688 - UA_MANDATORY), 689 - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 690 - UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( 691 - set_add_copy_action_in_auto)), 692 - UA_MANDATORY, 693 - UA_ALLOC_AND_COPY), 694 - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, 695 - enum mlx5_ib_uapi_flow_table_type, 696 - UA_MANDATORY)); 697 - 698 - DECLARE_UVERBS_NAMED_METHOD( 699 - MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 700 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, 701 - UVERBS_OBJECT_FLOW_ACTION, 702 - UVERBS_ACCESS_NEW, 703 - UA_MANDATORY), 704 - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, 705 - UVERBS_ATTR_MIN_SIZE(1), 706 - UA_ALLOC_AND_COPY, 707 - UA_OPTIONAL), 708 - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, 709 - enum mlx5_ib_uapi_flow_action_packet_reformat_type, 710 - UA_MANDATORY), 711 - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, 712 - enum mlx5_ib_uapi_flow_table_type, 713 - UA_MANDATORY)); 714 - 715 - ADD_UVERBS_METHODS( 716 - mlx5_ib_flow_actions, 717 - UVERBS_OBJECT_FLOW_ACTION, 718 - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), 719 - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); 720 - 721 - DECLARE_UVERBS_NAMED_METHOD( 722 - MLX5_IB_METHOD_FLOW_MATCHER_CREATE, 723 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, 724 - MLX5_IB_OBJECT_FLOW_MATCHER, 725 - UVERBS_ACCESS_NEW, 726 - UA_MANDATORY), 727 - UVERBS_ATTR_PTR_IN( 728 - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, 729 - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), 730 - UA_MANDATORY), 731 - UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, 732 - mlx5_ib_flow_type, 733 - UA_MANDATORY), 734 - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 735 - UVERBS_ATTR_TYPE(u8), 736 - UA_MANDATORY), 737 - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 738 - enum ib_flow_flags, 739 - UA_OPTIONAL), 740 - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, 741 - enum mlx5_ib_uapi_flow_table_type, 742 - UA_OPTIONAL)); 743 - 744 - DECLARE_UVERBS_NAMED_METHOD_DESTROY( 745 - MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, 746 - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, 747 - MLX5_IB_OBJECT_FLOW_MATCHER, 748 - UVERBS_ACCESS_DESTROY, 749 - UA_MANDATORY)); 750 - 751 - DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, 752 - UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), 753 - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), 754 - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); 755 - 756 - const struct uapi_definition mlx5_ib_flow_defs[] = { 757 - UAPI_DEF_CHAIN_OBJ_TREE_NAMED( 758 - MLX5_IB_OBJECT_FLOW_MATCHER), 759 - UAPI_DEF_CHAIN_OBJ_TREE( 760 - UVERBS_OBJECT_FLOW, 761 - &mlx5_ib_fs), 762 - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, 763 - &mlx5_ib_flow_actions), 764 - {}, 765 - };
+2516
drivers/infiniband/hw/mlx5/fs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #include <rdma/ib_user_verbs.h> 7 + #include <rdma/ib_verbs.h> 8 + #include <rdma/uverbs_types.h> 9 + #include <rdma/uverbs_ioctl.h> 10 + #include <rdma/uverbs_std_types.h> 11 + #include <rdma/mlx5_user_ioctl_cmds.h> 12 + #include <rdma/mlx5_user_ioctl_verbs.h> 13 + #include <rdma/ib_umem.h> 14 + #include <linux/mlx5/driver.h> 15 + #include <linux/mlx5/fs.h> 16 + #include <linux/mlx5/fs_helpers.h> 17 + #include <linux/mlx5/accel.h> 18 + #include <linux/mlx5/eswitch.h> 19 + #include "mlx5_ib.h" 20 + #include "counters.h" 21 + #include "devx.h" 22 + #include "fs.h" 23 + 24 + #define UVERBS_MODULE_NAME mlx5_ib 25 + #include <rdma/uverbs_named_ioctl.h> 26 + 27 + enum { 28 + MATCH_CRITERIA_ENABLE_OUTER_BIT, 29 + MATCH_CRITERIA_ENABLE_MISC_BIT, 30 + MATCH_CRITERIA_ENABLE_INNER_BIT, 31 + MATCH_CRITERIA_ENABLE_MISC2_BIT 32 + }; 33 + 34 + #define HEADER_IS_ZERO(match_criteria, headers) \ 35 + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 36 + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ 37 + 38 + static u8 get_match_criteria_enable(u32 *match_criteria) 39 + { 40 + u8 match_criteria_enable; 41 + 42 + match_criteria_enable = 43 + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << 44 + MATCH_CRITERIA_ENABLE_OUTER_BIT; 45 + match_criteria_enable |= 46 + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << 47 + MATCH_CRITERIA_ENABLE_MISC_BIT; 48 + match_criteria_enable |= 49 + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << 50 + MATCH_CRITERIA_ENABLE_INNER_BIT; 51 + match_criteria_enable |= 52 + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << 53 + MATCH_CRITERIA_ENABLE_MISC2_BIT; 54 + 55 + return match_criteria_enable; 56 + } 57 + 58 + static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) 59 + { 60 + u8 entry_mask; 61 + u8 entry_val; 62 + int err = 0; 63 + 64 + if (!mask) 65 + goto out; 66 + 67 + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, 68 + ip_protocol); 69 + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, 70 + ip_protocol); 71 + if (!entry_mask) { 72 + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); 73 + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); 74 + goto out; 75 + } 76 + /* Don't override existing ip protocol */ 77 + if (mask != entry_mask || val != entry_val) 78 + err = -EINVAL; 79 + out: 80 + return err; 81 + } 82 + 83 + static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, 84 + bool inner) 85 + { 86 + if (inner) { 87 + MLX5_SET(fte_match_set_misc, 88 + misc_c, inner_ipv6_flow_label, mask); 89 + MLX5_SET(fte_match_set_misc, 90 + misc_v, inner_ipv6_flow_label, val); 91 + } else { 92 + MLX5_SET(fte_match_set_misc, 93 + misc_c, outer_ipv6_flow_label, mask); 94 + MLX5_SET(fte_match_set_misc, 95 + misc_v, outer_ipv6_flow_label, val); 96 + } 97 + } 98 + 99 + static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) 100 + { 101 + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); 102 + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); 103 + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); 104 + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); 105 + } 106 + 107 + static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) 108 + { 109 + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && 110 + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) 111 + return -EOPNOTSUPP; 112 + 113 + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && 114 + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) 115 + return -EOPNOTSUPP; 116 + 117 + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && 118 + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) 119 + return -EOPNOTSUPP; 120 + 121 + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && 122 + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) 123 + return -EOPNOTSUPP; 124 + 125 + return 0; 126 + } 127 + 128 + #define LAST_ETH_FIELD vlan_tag 129 + #define LAST_IB_FIELD sl 130 + #define LAST_IPV4_FIELD tos 131 + #define LAST_IPV6_FIELD traffic_class 132 + #define LAST_TCP_UDP_FIELD src_port 133 + #define LAST_TUNNEL_FIELD tunnel_id 134 + #define LAST_FLOW_TAG_FIELD tag_id 135 + #define LAST_DROP_FIELD size 136 + #define LAST_COUNTERS_FIELD counters 137 + 138 + /* Field is the last supported field */ 139 + #define FIELDS_NOT_SUPPORTED(filter, field)\ 140 + memchr_inv((void *)&filter.field +\ 141 + sizeof(filter.field), 0,\ 142 + sizeof(filter) -\ 143 + offsetof(typeof(filter), field) -\ 144 + sizeof(filter.field)) 145 + 146 + int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, 147 + bool is_egress, 148 + struct mlx5_flow_act *action) 149 + { 150 + 151 + switch (maction->ib_action.type) { 152 + case IB_FLOW_ACTION_ESP: 153 + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | 154 + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) 155 + return -EINVAL; 156 + /* Currently only AES_GCM keymat is supported by the driver */ 157 + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; 158 + action->action |= is_egress ? 159 + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : 160 + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; 161 + return 0; 162 + case IB_FLOW_ACTION_UNSPECIFIED: 163 + if (maction->flow_action_raw.sub_type == 164 + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { 165 + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 166 + return -EINVAL; 167 + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 168 + action->modify_hdr = 169 + maction->flow_action_raw.modify_hdr; 170 + return 0; 171 + } 172 + if (maction->flow_action_raw.sub_type == 173 + MLX5_IB_FLOW_ACTION_DECAP) { 174 + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) 175 + return -EINVAL; 176 + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 177 + return 0; 178 + } 179 + if (maction->flow_action_raw.sub_type == 180 + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { 181 + if (action->action & 182 + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) 183 + return -EINVAL; 184 + action->action |= 185 + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; 186 + action->pkt_reformat = 187 + maction->flow_action_raw.pkt_reformat; 188 + return 0; 189 + } 190 + fallthrough; 191 + default: 192 + return -EOPNOTSUPP; 193 + } 194 + } 195 + 196 + static int parse_flow_attr(struct mlx5_core_dev *mdev, 197 + struct mlx5_flow_spec *spec, 198 + const union ib_flow_spec *ib_spec, 199 + const struct ib_flow_attr *flow_attr, 200 + struct mlx5_flow_act *action, u32 prev_type) 201 + { 202 + struct mlx5_flow_context *flow_context = &spec->flow_context; 203 + u32 *match_c = spec->match_criteria; 204 + u32 *match_v = spec->match_value; 205 + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 206 + misc_parameters); 207 + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, 208 + misc_parameters); 209 + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, 210 + misc_parameters_2); 211 + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, 212 + misc_parameters_2); 213 + void *headers_c; 214 + void *headers_v; 215 + int match_ipv; 216 + int ret; 217 + 218 + if (ib_spec->type & IB_FLOW_SPEC_INNER) { 219 + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 220 + inner_headers); 221 + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 222 + inner_headers); 223 + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 224 + ft_field_support.inner_ip_version); 225 + } else { 226 + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 227 + outer_headers); 228 + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 229 + outer_headers); 230 + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 231 + ft_field_support.outer_ip_version); 232 + } 233 + 234 + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 235 + case IB_FLOW_SPEC_ETH: 236 + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 237 + return -EOPNOTSUPP; 238 + 239 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 240 + dmac_47_16), 241 + ib_spec->eth.mask.dst_mac); 242 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 243 + dmac_47_16), 244 + ib_spec->eth.val.dst_mac); 245 + 246 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 247 + smac_47_16), 248 + ib_spec->eth.mask.src_mac); 249 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 250 + smac_47_16), 251 + ib_spec->eth.val.src_mac); 252 + 253 + if (ib_spec->eth.mask.vlan_tag) { 254 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 255 + cvlan_tag, 1); 256 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 257 + cvlan_tag, 1); 258 + 259 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 260 + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 261 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 262 + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 263 + 264 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 265 + first_cfi, 266 + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 267 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 268 + first_cfi, 269 + ntohs(ib_spec->eth.val.vlan_tag) >> 12); 270 + 271 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 272 + first_prio, 273 + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 274 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 275 + first_prio, 276 + ntohs(ib_spec->eth.val.vlan_tag) >> 13); 277 + } 278 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 279 + ethertype, ntohs(ib_spec->eth.mask.ether_type)); 280 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 281 + ethertype, ntohs(ib_spec->eth.val.ether_type)); 282 + break; 283 + case IB_FLOW_SPEC_IPV4: 284 + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 285 + return -EOPNOTSUPP; 286 + 287 + if (match_ipv) { 288 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 289 + ip_version, 0xf); 290 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 291 + ip_version, MLX5_FS_IPV4_VERSION); 292 + } else { 293 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 294 + ethertype, 0xffff); 295 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 296 + ethertype, ETH_P_IP); 297 + } 298 + 299 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 300 + src_ipv4_src_ipv6.ipv4_layout.ipv4), 301 + &ib_spec->ipv4.mask.src_ip, 302 + sizeof(ib_spec->ipv4.mask.src_ip)); 303 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 304 + src_ipv4_src_ipv6.ipv4_layout.ipv4), 305 + &ib_spec->ipv4.val.src_ip, 306 + sizeof(ib_spec->ipv4.val.src_ip)); 307 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 308 + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 309 + &ib_spec->ipv4.mask.dst_ip, 310 + sizeof(ib_spec->ipv4.mask.dst_ip)); 311 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 312 + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 313 + &ib_spec->ipv4.val.dst_ip, 314 + sizeof(ib_spec->ipv4.val.dst_ip)); 315 + 316 + set_tos(headers_c, headers_v, 317 + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); 318 + 319 + if (set_proto(headers_c, headers_v, 320 + ib_spec->ipv4.mask.proto, 321 + ib_spec->ipv4.val.proto)) 322 + return -EINVAL; 323 + break; 324 + case IB_FLOW_SPEC_IPV6: 325 + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 326 + return -EOPNOTSUPP; 327 + 328 + if (match_ipv) { 329 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 330 + ip_version, 0xf); 331 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 332 + ip_version, MLX5_FS_IPV6_VERSION); 333 + } else { 334 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 335 + ethertype, 0xffff); 336 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 337 + ethertype, ETH_P_IPV6); 338 + } 339 + 340 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 341 + src_ipv4_src_ipv6.ipv6_layout.ipv6), 342 + &ib_spec->ipv6.mask.src_ip, 343 + sizeof(ib_spec->ipv6.mask.src_ip)); 344 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 345 + src_ipv4_src_ipv6.ipv6_layout.ipv6), 346 + &ib_spec->ipv6.val.src_ip, 347 + sizeof(ib_spec->ipv6.val.src_ip)); 348 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 349 + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 350 + &ib_spec->ipv6.mask.dst_ip, 351 + sizeof(ib_spec->ipv6.mask.dst_ip)); 352 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 353 + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 354 + &ib_spec->ipv6.val.dst_ip, 355 + sizeof(ib_spec->ipv6.val.dst_ip)); 356 + 357 + set_tos(headers_c, headers_v, 358 + ib_spec->ipv6.mask.traffic_class, 359 + ib_spec->ipv6.val.traffic_class); 360 + 361 + if (set_proto(headers_c, headers_v, 362 + ib_spec->ipv6.mask.next_hdr, 363 + ib_spec->ipv6.val.next_hdr)) 364 + return -EINVAL; 365 + 366 + set_flow_label(misc_params_c, misc_params_v, 367 + ntohl(ib_spec->ipv6.mask.flow_label), 368 + ntohl(ib_spec->ipv6.val.flow_label), 369 + ib_spec->type & IB_FLOW_SPEC_INNER); 370 + break; 371 + case IB_FLOW_SPEC_ESP: 372 + if (ib_spec->esp.mask.seq) 373 + return -EOPNOTSUPP; 374 + 375 + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 376 + ntohl(ib_spec->esp.mask.spi)); 377 + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 378 + ntohl(ib_spec->esp.val.spi)); 379 + break; 380 + case IB_FLOW_SPEC_TCP: 381 + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 382 + LAST_TCP_UDP_FIELD)) 383 + return -EOPNOTSUPP; 384 + 385 + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) 386 + return -EINVAL; 387 + 388 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, 389 + ntohs(ib_spec->tcp_udp.mask.src_port)); 390 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, 391 + ntohs(ib_spec->tcp_udp.val.src_port)); 392 + 393 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, 394 + ntohs(ib_spec->tcp_udp.mask.dst_port)); 395 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, 396 + ntohs(ib_spec->tcp_udp.val.dst_port)); 397 + break; 398 + case IB_FLOW_SPEC_UDP: 399 + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 400 + LAST_TCP_UDP_FIELD)) 401 + return -EOPNOTSUPP; 402 + 403 + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) 404 + return -EINVAL; 405 + 406 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, 407 + ntohs(ib_spec->tcp_udp.mask.src_port)); 408 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, 409 + ntohs(ib_spec->tcp_udp.val.src_port)); 410 + 411 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, 412 + ntohs(ib_spec->tcp_udp.mask.dst_port)); 413 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 414 + ntohs(ib_spec->tcp_udp.val.dst_port)); 415 + break; 416 + case IB_FLOW_SPEC_GRE: 417 + if (ib_spec->gre.mask.c_ks_res0_ver) 418 + return -EOPNOTSUPP; 419 + 420 + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) 421 + return -EINVAL; 422 + 423 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 424 + 0xff); 425 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 426 + IPPROTO_GRE); 427 + 428 + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, 429 + ntohs(ib_spec->gre.mask.protocol)); 430 + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, 431 + ntohs(ib_spec->gre.val.protocol)); 432 + 433 + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, 434 + gre_key.nvgre.hi), 435 + &ib_spec->gre.mask.key, 436 + sizeof(ib_spec->gre.mask.key)); 437 + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, 438 + gre_key.nvgre.hi), 439 + &ib_spec->gre.val.key, 440 + sizeof(ib_spec->gre.val.key)); 441 + break; 442 + case IB_FLOW_SPEC_MPLS: 443 + switch (prev_type) { 444 + case IB_FLOW_SPEC_UDP: 445 + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 446 + ft_field_support.outer_first_mpls_over_udp), 447 + &ib_spec->mpls.mask.tag)) 448 + return -EOPNOTSUPP; 449 + 450 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 451 + outer_first_mpls_over_udp), 452 + &ib_spec->mpls.val.tag, 453 + sizeof(ib_spec->mpls.val.tag)); 454 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 455 + outer_first_mpls_over_udp), 456 + &ib_spec->mpls.mask.tag, 457 + sizeof(ib_spec->mpls.mask.tag)); 458 + break; 459 + case IB_FLOW_SPEC_GRE: 460 + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 461 + ft_field_support.outer_first_mpls_over_gre), 462 + &ib_spec->mpls.mask.tag)) 463 + return -EOPNOTSUPP; 464 + 465 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 466 + outer_first_mpls_over_gre), 467 + &ib_spec->mpls.val.tag, 468 + sizeof(ib_spec->mpls.val.tag)); 469 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 470 + outer_first_mpls_over_gre), 471 + &ib_spec->mpls.mask.tag, 472 + sizeof(ib_spec->mpls.mask.tag)); 473 + break; 474 + default: 475 + if (ib_spec->type & IB_FLOW_SPEC_INNER) { 476 + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 477 + ft_field_support.inner_first_mpls), 478 + &ib_spec->mpls.mask.tag)) 479 + return -EOPNOTSUPP; 480 + 481 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 482 + inner_first_mpls), 483 + &ib_spec->mpls.val.tag, 484 + sizeof(ib_spec->mpls.val.tag)); 485 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 486 + inner_first_mpls), 487 + &ib_spec->mpls.mask.tag, 488 + sizeof(ib_spec->mpls.mask.tag)); 489 + } else { 490 + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 491 + ft_field_support.outer_first_mpls), 492 + &ib_spec->mpls.mask.tag)) 493 + return -EOPNOTSUPP; 494 + 495 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 496 + outer_first_mpls), 497 + &ib_spec->mpls.val.tag, 498 + sizeof(ib_spec->mpls.val.tag)); 499 + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 500 + outer_first_mpls), 501 + &ib_spec->mpls.mask.tag, 502 + sizeof(ib_spec->mpls.mask.tag)); 503 + } 504 + } 505 + break; 506 + case IB_FLOW_SPEC_VXLAN_TUNNEL: 507 + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 508 + LAST_TUNNEL_FIELD)) 509 + return -EOPNOTSUPP; 510 + 511 + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 512 + ntohl(ib_spec->tunnel.mask.tunnel_id)); 513 + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 514 + ntohl(ib_spec->tunnel.val.tunnel_id)); 515 + break; 516 + case IB_FLOW_SPEC_ACTION_TAG: 517 + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, 518 + LAST_FLOW_TAG_FIELD)) 519 + return -EOPNOTSUPP; 520 + if (ib_spec->flow_tag.tag_id >= BIT(24)) 521 + return -EINVAL; 522 + 523 + flow_context->flow_tag = ib_spec->flow_tag.tag_id; 524 + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 525 + break; 526 + case IB_FLOW_SPEC_ACTION_DROP: 527 + if (FIELDS_NOT_SUPPORTED(ib_spec->drop, 528 + LAST_DROP_FIELD)) 529 + return -EOPNOTSUPP; 530 + action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 531 + break; 532 + case IB_FLOW_SPEC_ACTION_HANDLE: 533 + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), 534 + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); 535 + if (ret) 536 + return ret; 537 + break; 538 + case IB_FLOW_SPEC_ACTION_COUNT: 539 + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, 540 + LAST_COUNTERS_FIELD)) 541 + return -EOPNOTSUPP; 542 + 543 + /* for now support only one counters spec per flow */ 544 + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 545 + return -EINVAL; 546 + 547 + action->counters = ib_spec->flow_count.counters; 548 + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 549 + break; 550 + default: 551 + return -EINVAL; 552 + } 553 + 554 + return 0; 555 + } 556 + 557 + /* If a flow could catch both multicast and unicast packets, 558 + * it won't fall into the multicast flow steering table and this rule 559 + * could steal other multicast packets. 560 + */ 561 + static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) 562 + { 563 + union ib_flow_spec *flow_spec; 564 + 565 + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || 566 + ib_attr->num_of_specs < 1) 567 + return false; 568 + 569 + flow_spec = (union ib_flow_spec *)(ib_attr + 1); 570 + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { 571 + struct ib_flow_spec_ipv4 *ipv4_spec; 572 + 573 + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; 574 + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) 575 + return true; 576 + 577 + return false; 578 + } 579 + 580 + if (flow_spec->type == IB_FLOW_SPEC_ETH) { 581 + struct ib_flow_spec_eth *eth_spec; 582 + 583 + eth_spec = (struct ib_flow_spec_eth *)flow_spec; 584 + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && 585 + is_multicast_ether_addr(eth_spec->val.dst_mac); 586 + } 587 + 588 + return false; 589 + } 590 + 591 + enum valid_spec { 592 + VALID_SPEC_INVALID, 593 + VALID_SPEC_VALID, 594 + VALID_SPEC_NA, 595 + }; 596 + 597 + static enum valid_spec 598 + is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, 599 + const struct mlx5_flow_spec *spec, 600 + const struct mlx5_flow_act *flow_act, 601 + bool egress) 602 + { 603 + const u32 *match_c = spec->match_criteria; 604 + bool is_crypto = 605 + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | 606 + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); 607 + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); 608 + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; 609 + 610 + /* 611 + * Currently only crypto is supported in egress, when regular egress 612 + * rules would be supported, always return VALID_SPEC_NA. 613 + */ 614 + if (!is_crypto) 615 + return VALID_SPEC_NA; 616 + 617 + return is_crypto && is_ipsec && 618 + (!egress || (!is_drop && 619 + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? 620 + VALID_SPEC_VALID : VALID_SPEC_INVALID; 621 + } 622 + 623 + static bool is_valid_spec(struct mlx5_core_dev *mdev, 624 + const struct mlx5_flow_spec *spec, 625 + const struct mlx5_flow_act *flow_act, 626 + bool egress) 627 + { 628 + /* We curretly only support ipsec egress flow */ 629 + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; 630 + } 631 + 632 + static bool is_valid_ethertype(struct mlx5_core_dev *mdev, 633 + const struct ib_flow_attr *flow_attr, 634 + bool check_inner) 635 + { 636 + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); 637 + int match_ipv = check_inner ? 638 + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 639 + ft_field_support.inner_ip_version) : 640 + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 641 + ft_field_support.outer_ip_version); 642 + int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; 643 + bool ipv4_spec_valid, ipv6_spec_valid; 644 + unsigned int ip_spec_type = 0; 645 + bool has_ethertype = false; 646 + unsigned int spec_index; 647 + bool mask_valid = true; 648 + u16 eth_type = 0; 649 + bool type_valid; 650 + 651 + /* Validate that ethertype is correct */ 652 + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 653 + if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && 654 + ib_spec->eth.mask.ether_type) { 655 + mask_valid = (ib_spec->eth.mask.ether_type == 656 + htons(0xffff)); 657 + has_ethertype = true; 658 + eth_type = ntohs(ib_spec->eth.val.ether_type); 659 + } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || 660 + (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { 661 + ip_spec_type = ib_spec->type; 662 + } 663 + ib_spec = (void *)ib_spec + ib_spec->size; 664 + } 665 + 666 + type_valid = (!has_ethertype) || (!ip_spec_type); 667 + if (!type_valid && mask_valid) { 668 + ipv4_spec_valid = (eth_type == ETH_P_IP) && 669 + (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); 670 + ipv6_spec_valid = (eth_type == ETH_P_IPV6) && 671 + (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); 672 + 673 + type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || 674 + (((eth_type == ETH_P_MPLS_UC) || 675 + (eth_type == ETH_P_MPLS_MC)) && match_ipv); 676 + } 677 + 678 + return type_valid; 679 + } 680 + 681 + static bool is_valid_attr(struct mlx5_core_dev *mdev, 682 + const struct ib_flow_attr *flow_attr) 683 + { 684 + return is_valid_ethertype(mdev, flow_attr, false) && 685 + is_valid_ethertype(mdev, flow_attr, true); 686 + } 687 + 688 + static void put_flow_table(struct mlx5_ib_dev *dev, 689 + struct mlx5_ib_flow_prio *prio, bool ft_added) 690 + { 691 + prio->refcount -= !!ft_added; 692 + if (!prio->refcount) { 693 + mlx5_destroy_flow_table(prio->flow_table); 694 + prio->flow_table = NULL; 695 + } 696 + } 697 + 698 + static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 699 + { 700 + struct mlx5_ib_flow_handler *handler = container_of(flow_id, 701 + struct mlx5_ib_flow_handler, 702 + ibflow); 703 + struct mlx5_ib_flow_handler *iter, *tmp; 704 + struct mlx5_ib_dev *dev = handler->dev; 705 + 706 + mutex_lock(&dev->flow_db->lock); 707 + 708 + list_for_each_entry_safe(iter, tmp, &handler->list, list) { 709 + mlx5_del_flow_rules(iter->rule); 710 + put_flow_table(dev, iter->prio, true); 711 + list_del(&iter->list); 712 + kfree(iter); 713 + } 714 + 715 + mlx5_del_flow_rules(handler->rule); 716 + put_flow_table(dev, handler->prio, true); 717 + mlx5_ib_counters_clear_description(handler->ibcounters); 718 + mutex_unlock(&dev->flow_db->lock); 719 + if (handler->flow_matcher) 720 + atomic_dec(&handler->flow_matcher->usecnt); 721 + kfree(handler); 722 + 723 + return 0; 724 + } 725 + 726 + static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 727 + { 728 + priority *= 2; 729 + if (!dont_trap) 730 + priority++; 731 + return priority; 732 + } 733 + 734 + enum flow_table_type { 735 + MLX5_IB_FT_RX, 736 + MLX5_IB_FT_TX 737 + }; 738 + 739 + #define MLX5_FS_MAX_TYPES 6 740 + #define MLX5_FS_MAX_ENTRIES BIT(16) 741 + 742 + static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, 743 + struct mlx5_ib_flow_prio *prio, 744 + int priority, 745 + int num_entries, int num_groups, 746 + u32 flags) 747 + { 748 + struct mlx5_flow_table_attr ft_attr = {}; 749 + struct mlx5_flow_table *ft; 750 + 751 + ft_attr.prio = priority; 752 + ft_attr.max_fte = num_entries; 753 + ft_attr.flags = flags; 754 + ft_attr.autogroup.max_num_groups = num_groups; 755 + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 756 + if (IS_ERR(ft)) 757 + return ERR_CAST(ft); 758 + 759 + prio->flow_table = ft; 760 + prio->refcount = 0; 761 + return prio; 762 + } 763 + 764 + static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 765 + struct ib_flow_attr *flow_attr, 766 + enum flow_table_type ft_type) 767 + { 768 + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 769 + struct mlx5_flow_namespace *ns = NULL; 770 + struct mlx5_ib_flow_prio *prio; 771 + struct mlx5_flow_table *ft; 772 + int max_table_size; 773 + int num_entries; 774 + int num_groups; 775 + bool esw_encap; 776 + u32 flags = 0; 777 + int priority; 778 + 779 + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 780 + log_max_ft_size)); 781 + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != 782 + DEVLINK_ESWITCH_ENCAP_MODE_NONE; 783 + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 784 + enum mlx5_flow_namespace_type fn_type; 785 + 786 + if (flow_is_multicast_only(flow_attr) && 787 + !dont_trap) 788 + priority = MLX5_IB_FLOW_MCAST_PRIO; 789 + else 790 + priority = ib_prio_to_core_prio(flow_attr->priority, 791 + dont_trap); 792 + if (ft_type == MLX5_IB_FT_RX) { 793 + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; 794 + prio = &dev->flow_db->prios[priority]; 795 + if (!dev->is_rep && !esw_encap && 796 + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) 797 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 798 + if (!dev->is_rep && !esw_encap && 799 + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 800 + reformat_l3_tunnel_to_l2)) 801 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 802 + } else { 803 + max_table_size = 804 + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 805 + log_max_ft_size)); 806 + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; 807 + prio = &dev->flow_db->egress_prios[priority]; 808 + if (!dev->is_rep && !esw_encap && 809 + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) 810 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 811 + } 812 + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); 813 + num_entries = MLX5_FS_MAX_ENTRIES; 814 + num_groups = MLX5_FS_MAX_TYPES; 815 + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 816 + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 817 + ns = mlx5_get_flow_namespace(dev->mdev, 818 + MLX5_FLOW_NAMESPACE_LEFTOVERS); 819 + build_leftovers_ft_param(&priority, 820 + &num_entries, 821 + &num_groups); 822 + prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; 823 + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 824 + if (!MLX5_CAP_FLOWTABLE(dev->mdev, 825 + allow_sniffer_and_nic_rx_shared_tir)) 826 + return ERR_PTR(-EOPNOTSUPP); 827 + 828 + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? 829 + MLX5_FLOW_NAMESPACE_SNIFFER_RX : 830 + MLX5_FLOW_NAMESPACE_SNIFFER_TX); 831 + 832 + prio = &dev->flow_db->sniffer[ft_type]; 833 + priority = 0; 834 + num_entries = 1; 835 + num_groups = 1; 836 + } 837 + 838 + if (!ns) 839 + return ERR_PTR(-EOPNOTSUPP); 840 + 841 + max_table_size = min_t(int, num_entries, max_table_size); 842 + 843 + ft = prio->flow_table; 844 + if (!ft) 845 + return _get_prio(ns, prio, priority, max_table_size, num_groups, 846 + flags); 847 + 848 + return prio; 849 + } 850 + 851 + static void set_underlay_qp(struct mlx5_ib_dev *dev, 852 + struct mlx5_flow_spec *spec, 853 + u32 underlay_qpn) 854 + { 855 + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, 856 + spec->match_criteria, 857 + misc_parameters); 858 + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 859 + misc_parameters); 860 + 861 + if (underlay_qpn && 862 + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 863 + ft_field_support.bth_dst_qp)) { 864 + MLX5_SET(fte_match_set_misc, 865 + misc_params_v, bth_dst_qp, underlay_qpn); 866 + MLX5_SET(fte_match_set_misc, 867 + misc_params_c, bth_dst_qp, 0xffffff); 868 + } 869 + } 870 + 871 + static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, 872 + struct mlx5_flow_spec *spec, 873 + struct mlx5_eswitch_rep *rep) 874 + { 875 + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; 876 + void *misc; 877 + 878 + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { 879 + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 880 + misc_parameters_2); 881 + 882 + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 883 + mlx5_eswitch_get_vport_metadata_for_match(esw, 884 + rep->vport)); 885 + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 886 + misc_parameters_2); 887 + 888 + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 889 + mlx5_eswitch_get_vport_metadata_mask()); 890 + } else { 891 + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 892 + misc_parameters); 893 + 894 + MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); 895 + 896 + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 897 + misc_parameters); 898 + 899 + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); 900 + } 901 + } 902 + 903 + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, 904 + struct mlx5_ib_flow_prio *ft_prio, 905 + const struct ib_flow_attr *flow_attr, 906 + struct mlx5_flow_destination *dst, 907 + u32 underlay_qpn, 908 + struct mlx5_ib_create_flow *ucmd) 909 + { 910 + struct mlx5_flow_table *ft = ft_prio->flow_table; 911 + struct mlx5_ib_flow_handler *handler; 912 + struct mlx5_flow_act flow_act = {}; 913 + struct mlx5_flow_spec *spec; 914 + struct mlx5_flow_destination dest_arr[2] = {}; 915 + struct mlx5_flow_destination *rule_dst = dest_arr; 916 + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); 917 + unsigned int spec_index; 918 + u32 prev_type = 0; 919 + int err = 0; 920 + int dest_num = 0; 921 + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; 922 + 923 + if (!is_valid_attr(dev->mdev, flow_attr)) 924 + return ERR_PTR(-EINVAL); 925 + 926 + if (dev->is_rep && is_egress) 927 + return ERR_PTR(-EINVAL); 928 + 929 + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 930 + handler = kzalloc(sizeof(*handler), GFP_KERNEL); 931 + if (!handler || !spec) { 932 + err = -ENOMEM; 933 + goto free; 934 + } 935 + 936 + INIT_LIST_HEAD(&handler->list); 937 + 938 + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 939 + err = parse_flow_attr(dev->mdev, spec, 940 + ib_flow, flow_attr, &flow_act, 941 + prev_type); 942 + if (err < 0) 943 + goto free; 944 + 945 + prev_type = ((union ib_flow_spec *)ib_flow)->type; 946 + ib_flow += ((union ib_flow_spec *)ib_flow)->size; 947 + } 948 + 949 + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { 950 + memcpy(&dest_arr[0], dst, sizeof(*dst)); 951 + dest_num++; 952 + } 953 + 954 + if (!flow_is_multicast_only(flow_attr)) 955 + set_underlay_qp(dev, spec, underlay_qpn); 956 + 957 + if (dev->is_rep) { 958 + struct mlx5_eswitch_rep *rep; 959 + 960 + rep = dev->port[flow_attr->port - 1].rep; 961 + if (!rep) { 962 + err = -EINVAL; 963 + goto free; 964 + } 965 + 966 + mlx5_ib_set_rule_source_port(dev, spec, rep); 967 + } 968 + 969 + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 970 + 971 + if (is_egress && 972 + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { 973 + err = -EINVAL; 974 + goto free; 975 + } 976 + 977 + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 978 + struct mlx5_ib_mcounters *mcounters; 979 + 980 + err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); 981 + if (err) 982 + goto free; 983 + 984 + mcounters = to_mcounters(flow_act.counters); 985 + handler->ibcounters = flow_act.counters; 986 + dest_arr[dest_num].type = 987 + MLX5_FLOW_DESTINATION_TYPE_COUNTER; 988 + dest_arr[dest_num].counter_id = 989 + mlx5_fc_id(mcounters->hw_cntrs_hndl); 990 + dest_num++; 991 + } 992 + 993 + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { 994 + if (!dest_num) 995 + rule_dst = NULL; 996 + } else { 997 + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) 998 + flow_act.action |= 999 + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 1000 + if (is_egress) 1001 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; 1002 + else if (dest_num) 1003 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1004 + } 1005 + 1006 + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && 1007 + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1008 + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { 1009 + mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", 1010 + spec->flow_context.flow_tag, flow_attr->type); 1011 + err = -EINVAL; 1012 + goto free; 1013 + } 1014 + handler->rule = mlx5_add_flow_rules(ft, spec, 1015 + &flow_act, 1016 + rule_dst, dest_num); 1017 + 1018 + if (IS_ERR(handler->rule)) { 1019 + err = PTR_ERR(handler->rule); 1020 + goto free; 1021 + } 1022 + 1023 + ft_prio->refcount++; 1024 + handler->prio = ft_prio; 1025 + handler->dev = dev; 1026 + 1027 + ft_prio->flow_table = ft; 1028 + free: 1029 + if (err && handler) { 1030 + mlx5_ib_counters_clear_description(handler->ibcounters); 1031 + kfree(handler); 1032 + } 1033 + kvfree(spec); 1034 + return err ? ERR_PTR(err) : handler; 1035 + } 1036 + 1037 + static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, 1038 + struct mlx5_ib_flow_prio *ft_prio, 1039 + const struct ib_flow_attr *flow_attr, 1040 + struct mlx5_flow_destination *dst) 1041 + { 1042 + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); 1043 + } 1044 + 1045 + enum { 1046 + LEFTOVERS_MC, 1047 + LEFTOVERS_UC, 1048 + }; 1049 + 1050 + static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, 1051 + struct mlx5_ib_flow_prio *ft_prio, 1052 + struct ib_flow_attr *flow_attr, 1053 + struct mlx5_flow_destination *dst) 1054 + { 1055 + struct mlx5_ib_flow_handler *handler_ucast = NULL; 1056 + struct mlx5_ib_flow_handler *handler = NULL; 1057 + 1058 + static struct { 1059 + struct ib_flow_attr flow_attr; 1060 + struct ib_flow_spec_eth eth_flow; 1061 + } leftovers_specs[] = { 1062 + [LEFTOVERS_MC] = { 1063 + .flow_attr = { 1064 + .num_of_specs = 1, 1065 + .size = sizeof(leftovers_specs[0]) 1066 + }, 1067 + .eth_flow = { 1068 + .type = IB_FLOW_SPEC_ETH, 1069 + .size = sizeof(struct ib_flow_spec_eth), 1070 + .mask = {.dst_mac = {0x1} }, 1071 + .val = {.dst_mac = {0x1} } 1072 + } 1073 + }, 1074 + [LEFTOVERS_UC] = { 1075 + .flow_attr = { 1076 + .num_of_specs = 1, 1077 + .size = sizeof(leftovers_specs[0]) 1078 + }, 1079 + .eth_flow = { 1080 + .type = IB_FLOW_SPEC_ETH, 1081 + .size = sizeof(struct ib_flow_spec_eth), 1082 + .mask = {.dst_mac = {0x1} }, 1083 + .val = {.dst_mac = {} } 1084 + } 1085 + } 1086 + }; 1087 + 1088 + handler = create_flow_rule(dev, ft_prio, 1089 + &leftovers_specs[LEFTOVERS_MC].flow_attr, 1090 + dst); 1091 + if (!IS_ERR(handler) && 1092 + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { 1093 + handler_ucast = create_flow_rule(dev, ft_prio, 1094 + &leftovers_specs[LEFTOVERS_UC].flow_attr, 1095 + dst); 1096 + if (IS_ERR(handler_ucast)) { 1097 + mlx5_del_flow_rules(handler->rule); 1098 + ft_prio->refcount--; 1099 + kfree(handler); 1100 + handler = handler_ucast; 1101 + } else { 1102 + list_add(&handler_ucast->list, &handler->list); 1103 + } 1104 + } 1105 + 1106 + return handler; 1107 + } 1108 + 1109 + static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, 1110 + struct mlx5_ib_flow_prio *ft_rx, 1111 + struct mlx5_ib_flow_prio *ft_tx, 1112 + struct mlx5_flow_destination *dst) 1113 + { 1114 + struct mlx5_ib_flow_handler *handler_rx; 1115 + struct mlx5_ib_flow_handler *handler_tx; 1116 + int err; 1117 + static const struct ib_flow_attr flow_attr = { 1118 + .num_of_specs = 0, 1119 + .size = sizeof(flow_attr) 1120 + }; 1121 + 1122 + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); 1123 + if (IS_ERR(handler_rx)) { 1124 + err = PTR_ERR(handler_rx); 1125 + goto err; 1126 + } 1127 + 1128 + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); 1129 + if (IS_ERR(handler_tx)) { 1130 + err = PTR_ERR(handler_tx); 1131 + goto err_tx; 1132 + } 1133 + 1134 + list_add(&handler_tx->list, &handler_rx->list); 1135 + 1136 + return handler_rx; 1137 + 1138 + err_tx: 1139 + mlx5_del_flow_rules(handler_rx->rule); 1140 + ft_rx->refcount--; 1141 + kfree(handler_rx); 1142 + err: 1143 + return ERR_PTR(err); 1144 + } 1145 + 1146 + 1147 + static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, 1148 + struct ib_flow_attr *flow_attr, 1149 + int domain, 1150 + struct ib_udata *udata) 1151 + { 1152 + struct mlx5_ib_dev *dev = to_mdev(qp->device); 1153 + struct mlx5_ib_qp *mqp = to_mqp(qp); 1154 + struct mlx5_ib_flow_handler *handler = NULL; 1155 + struct mlx5_flow_destination *dst = NULL; 1156 + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; 1157 + struct mlx5_ib_flow_prio *ft_prio; 1158 + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; 1159 + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; 1160 + size_t min_ucmd_sz, required_ucmd_sz; 1161 + int err; 1162 + int underlay_qpn; 1163 + 1164 + if (udata && udata->inlen) { 1165 + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + 1166 + sizeof(ucmd_hdr.reserved); 1167 + if (udata->inlen < min_ucmd_sz) 1168 + return ERR_PTR(-EOPNOTSUPP); 1169 + 1170 + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); 1171 + if (err) 1172 + return ERR_PTR(err); 1173 + 1174 + /* currently supports only one counters data */ 1175 + if (ucmd_hdr.ncounters_data > 1) 1176 + return ERR_PTR(-EINVAL); 1177 + 1178 + required_ucmd_sz = min_ucmd_sz + 1179 + sizeof(struct mlx5_ib_flow_counters_data) * 1180 + ucmd_hdr.ncounters_data; 1181 + if (udata->inlen > required_ucmd_sz && 1182 + !ib_is_udata_cleared(udata, required_ucmd_sz, 1183 + udata->inlen - required_ucmd_sz)) 1184 + return ERR_PTR(-EOPNOTSUPP); 1185 + 1186 + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); 1187 + if (!ucmd) 1188 + return ERR_PTR(-ENOMEM); 1189 + 1190 + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); 1191 + if (err) 1192 + goto free_ucmd; 1193 + } 1194 + 1195 + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { 1196 + err = -ENOMEM; 1197 + goto free_ucmd; 1198 + } 1199 + 1200 + if (domain != IB_FLOW_DOMAIN_USER || 1201 + flow_attr->port > dev->num_ports || 1202 + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | 1203 + IB_FLOW_ATTR_FLAGS_EGRESS))) { 1204 + err = -EINVAL; 1205 + goto free_ucmd; 1206 + } 1207 + 1208 + if (is_egress && 1209 + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1210 + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { 1211 + err = -EINVAL; 1212 + goto free_ucmd; 1213 + } 1214 + 1215 + dst = kzalloc(sizeof(*dst), GFP_KERNEL); 1216 + if (!dst) { 1217 + err = -ENOMEM; 1218 + goto free_ucmd; 1219 + } 1220 + 1221 + mutex_lock(&dev->flow_db->lock); 1222 + 1223 + ft_prio = get_flow_table(dev, flow_attr, 1224 + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); 1225 + if (IS_ERR(ft_prio)) { 1226 + err = PTR_ERR(ft_prio); 1227 + goto unlock; 1228 + } 1229 + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 1230 + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); 1231 + if (IS_ERR(ft_prio_tx)) { 1232 + err = PTR_ERR(ft_prio_tx); 1233 + ft_prio_tx = NULL; 1234 + goto destroy_ft; 1235 + } 1236 + } 1237 + 1238 + if (is_egress) { 1239 + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; 1240 + } else { 1241 + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1242 + if (mqp->is_rss) 1243 + dst->tir_num = mqp->rss_qp.tirn; 1244 + else 1245 + dst->tir_num = mqp->raw_packet_qp.rq.tirn; 1246 + } 1247 + 1248 + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1249 + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? 1250 + mqp->underlay_qpn : 1251 + 0; 1252 + handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, 1253 + underlay_qpn, ucmd); 1254 + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1255 + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1256 + handler = create_leftovers_rule(dev, ft_prio, flow_attr, 1257 + dst); 1258 + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 1259 + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); 1260 + } else { 1261 + err = -EINVAL; 1262 + goto destroy_ft; 1263 + } 1264 + 1265 + if (IS_ERR(handler)) { 1266 + err = PTR_ERR(handler); 1267 + handler = NULL; 1268 + goto destroy_ft; 1269 + } 1270 + 1271 + mutex_unlock(&dev->flow_db->lock); 1272 + kfree(dst); 1273 + kfree(ucmd); 1274 + 1275 + return &handler->ibflow; 1276 + 1277 + destroy_ft: 1278 + put_flow_table(dev, ft_prio, false); 1279 + if (ft_prio_tx) 1280 + put_flow_table(dev, ft_prio_tx, false); 1281 + unlock: 1282 + mutex_unlock(&dev->flow_db->lock); 1283 + kfree(dst); 1284 + free_ucmd: 1285 + kfree(ucmd); 1286 + return ERR_PTR(err); 1287 + } 1288 + 1289 + static struct mlx5_ib_flow_prio * 1290 + _get_flow_table(struct mlx5_ib_dev *dev, 1291 + struct mlx5_ib_flow_matcher *fs_matcher, 1292 + bool mcast) 1293 + { 1294 + struct mlx5_flow_namespace *ns = NULL; 1295 + struct mlx5_ib_flow_prio *prio = NULL; 1296 + int max_table_size = 0; 1297 + bool esw_encap; 1298 + u32 flags = 0; 1299 + int priority; 1300 + 1301 + if (mcast) 1302 + priority = MLX5_IB_FLOW_MCAST_PRIO; 1303 + else 1304 + priority = ib_prio_to_core_prio(fs_matcher->priority, false); 1305 + 1306 + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != 1307 + DEVLINK_ESWITCH_ENCAP_MODE_NONE; 1308 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { 1309 + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 1310 + log_max_ft_size)); 1311 + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) 1312 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 1313 + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 1314 + reformat_l3_tunnel_to_l2) && 1315 + !esw_encap) 1316 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 1317 + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { 1318 + max_table_size = BIT( 1319 + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); 1320 + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) 1321 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 1322 + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { 1323 + max_table_size = BIT( 1324 + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); 1325 + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) 1326 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 1327 + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && 1328 + esw_encap) 1329 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 1330 + priority = FDB_BYPASS_PATH; 1331 + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { 1332 + max_table_size = 1333 + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 1334 + log_max_ft_size)); 1335 + priority = fs_matcher->priority; 1336 + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { 1337 + max_table_size = 1338 + BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 1339 + log_max_ft_size)); 1340 + priority = fs_matcher->priority; 1341 + } 1342 + 1343 + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); 1344 + 1345 + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); 1346 + if (!ns) 1347 + return ERR_PTR(-EOPNOTSUPP); 1348 + 1349 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) 1350 + prio = &dev->flow_db->prios[priority]; 1351 + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) 1352 + prio = &dev->flow_db->egress_prios[priority]; 1353 + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) 1354 + prio = &dev->flow_db->fdb; 1355 + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) 1356 + prio = &dev->flow_db->rdma_rx[priority]; 1357 + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) 1358 + prio = &dev->flow_db->rdma_tx[priority]; 1359 + 1360 + if (!prio) 1361 + return ERR_PTR(-EINVAL); 1362 + 1363 + if (prio->flow_table) 1364 + return prio; 1365 + 1366 + return _get_prio(ns, prio, priority, max_table_size, 1367 + MLX5_FS_MAX_TYPES, flags); 1368 + } 1369 + 1370 + static struct mlx5_ib_flow_handler * 1371 + _create_raw_flow_rule(struct mlx5_ib_dev *dev, 1372 + struct mlx5_ib_flow_prio *ft_prio, 1373 + struct mlx5_flow_destination *dst, 1374 + struct mlx5_ib_flow_matcher *fs_matcher, 1375 + struct mlx5_flow_context *flow_context, 1376 + struct mlx5_flow_act *flow_act, 1377 + void *cmd_in, int inlen, 1378 + int dst_num) 1379 + { 1380 + struct mlx5_ib_flow_handler *handler; 1381 + struct mlx5_flow_spec *spec; 1382 + struct mlx5_flow_table *ft = ft_prio->flow_table; 1383 + int err = 0; 1384 + 1385 + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1386 + handler = kzalloc(sizeof(*handler), GFP_KERNEL); 1387 + if (!handler || !spec) { 1388 + err = -ENOMEM; 1389 + goto free; 1390 + } 1391 + 1392 + INIT_LIST_HEAD(&handler->list); 1393 + 1394 + memcpy(spec->match_value, cmd_in, inlen); 1395 + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, 1396 + fs_matcher->mask_len); 1397 + spec->match_criteria_enable = fs_matcher->match_criteria_enable; 1398 + spec->flow_context = *flow_context; 1399 + 1400 + handler->rule = mlx5_add_flow_rules(ft, spec, 1401 + flow_act, dst, dst_num); 1402 + 1403 + if (IS_ERR(handler->rule)) { 1404 + err = PTR_ERR(handler->rule); 1405 + goto free; 1406 + } 1407 + 1408 + ft_prio->refcount++; 1409 + handler->prio = ft_prio; 1410 + handler->dev = dev; 1411 + ft_prio->flow_table = ft; 1412 + 1413 + free: 1414 + if (err) 1415 + kfree(handler); 1416 + kvfree(spec); 1417 + return err ? ERR_PTR(err) : handler; 1418 + } 1419 + 1420 + static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, 1421 + void *match_v) 1422 + { 1423 + void *match_c; 1424 + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; 1425 + void *dmac, *dmac_mask; 1426 + void *ipv4, *ipv4_mask; 1427 + 1428 + if (!(fs_matcher->match_criteria_enable & 1429 + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) 1430 + return false; 1431 + 1432 + match_c = fs_matcher->matcher_mask.match_params; 1433 + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, 1434 + outer_headers); 1435 + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, 1436 + outer_headers); 1437 + 1438 + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, 1439 + dmac_47_16); 1440 + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, 1441 + dmac_47_16); 1442 + 1443 + if (is_multicast_ether_addr(dmac) && 1444 + is_multicast_ether_addr(dmac_mask)) 1445 + return true; 1446 + 1447 + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, 1448 + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 1449 + 1450 + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, 1451 + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 1452 + 1453 + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && 1454 + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) 1455 + return true; 1456 + 1457 + return false; 1458 + } 1459 + 1460 + static struct mlx5_ib_flow_handler *raw_fs_rule_add( 1461 + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, 1462 + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, 1463 + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) 1464 + { 1465 + struct mlx5_flow_destination *dst; 1466 + struct mlx5_ib_flow_prio *ft_prio; 1467 + struct mlx5_ib_flow_handler *handler; 1468 + int dst_num = 0; 1469 + bool mcast; 1470 + int err; 1471 + 1472 + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) 1473 + return ERR_PTR(-EOPNOTSUPP); 1474 + 1475 + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) 1476 + return ERR_PTR(-ENOMEM); 1477 + 1478 + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); 1479 + if (!dst) 1480 + return ERR_PTR(-ENOMEM); 1481 + 1482 + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); 1483 + mutex_lock(&dev->flow_db->lock); 1484 + 1485 + ft_prio = _get_flow_table(dev, fs_matcher, mcast); 1486 + if (IS_ERR(ft_prio)) { 1487 + err = PTR_ERR(ft_prio); 1488 + goto unlock; 1489 + } 1490 + 1491 + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { 1492 + dst[dst_num].type = dest_type; 1493 + dst[dst_num++].tir_num = dest_id; 1494 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1495 + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { 1496 + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; 1497 + dst[dst_num++].ft_num = dest_id; 1498 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1499 + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { 1500 + dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; 1501 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; 1502 + } 1503 + 1504 + 1505 + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1506 + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1507 + dst[dst_num].counter_id = counter_id; 1508 + dst_num++; 1509 + } 1510 + 1511 + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, 1512 + flow_context, flow_act, 1513 + cmd_in, inlen, dst_num); 1514 + 1515 + if (IS_ERR(handler)) { 1516 + err = PTR_ERR(handler); 1517 + goto destroy_ft; 1518 + } 1519 + 1520 + mutex_unlock(&dev->flow_db->lock); 1521 + atomic_inc(&fs_matcher->usecnt); 1522 + handler->flow_matcher = fs_matcher; 1523 + 1524 + kfree(dst); 1525 + 1526 + return handler; 1527 + 1528 + destroy_ft: 1529 + put_flow_table(dev, ft_prio, false); 1530 + unlock: 1531 + mutex_unlock(&dev->flow_db->lock); 1532 + kfree(dst); 1533 + 1534 + return ERR_PTR(err); 1535 + } 1536 + 1537 + static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) 1538 + { 1539 + u32 flags = 0; 1540 + 1541 + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) 1542 + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; 1543 + 1544 + return flags; 1545 + } 1546 + 1547 + #define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ 1548 + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA 1549 + static struct ib_flow_action * 1550 + mlx5_ib_create_flow_action_esp(struct ib_device *device, 1551 + const struct ib_flow_action_attrs_esp *attr, 1552 + struct uverbs_attr_bundle *attrs) 1553 + { 1554 + struct mlx5_ib_dev *mdev = to_mdev(device); 1555 + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; 1556 + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; 1557 + struct mlx5_ib_flow_action *action; 1558 + u64 action_flags; 1559 + u64 flags; 1560 + int err = 0; 1561 + 1562 + err = uverbs_get_flags64( 1563 + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, 1564 + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); 1565 + if (err) 1566 + return ERR_PTR(err); 1567 + 1568 + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); 1569 + 1570 + /* We current only support a subset of the standard features. Only a 1571 + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn 1572 + * (with overlap). Full offload mode isn't supported. 1573 + */ 1574 + if (!attr->keymat || attr->replay || attr->encap || 1575 + attr->spi || attr->seq || attr->tfc_pad || 1576 + attr->hard_limit_pkts || 1577 + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 1578 + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) 1579 + return ERR_PTR(-EOPNOTSUPP); 1580 + 1581 + if (attr->keymat->protocol != 1582 + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) 1583 + return ERR_PTR(-EOPNOTSUPP); 1584 + 1585 + aes_gcm = &attr->keymat->keymat.aes_gcm; 1586 + 1587 + if (aes_gcm->icv_len != 16 || 1588 + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) 1589 + return ERR_PTR(-EOPNOTSUPP); 1590 + 1591 + action = kmalloc(sizeof(*action), GFP_KERNEL); 1592 + if (!action) 1593 + return ERR_PTR(-ENOMEM); 1594 + 1595 + action->esp_aes_gcm.ib_flags = attr->flags; 1596 + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, 1597 + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); 1598 + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; 1599 + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, 1600 + sizeof(accel_attrs.keymat.aes_gcm.salt)); 1601 + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, 1602 + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); 1603 + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; 1604 + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; 1605 + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; 1606 + 1607 + accel_attrs.esn = attr->esn; 1608 + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) 1609 + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; 1610 + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) 1611 + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 1612 + 1613 + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) 1614 + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; 1615 + 1616 + action->esp_aes_gcm.ctx = 1617 + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); 1618 + if (IS_ERR(action->esp_aes_gcm.ctx)) { 1619 + err = PTR_ERR(action->esp_aes_gcm.ctx); 1620 + goto err_parse; 1621 + } 1622 + 1623 + action->esp_aes_gcm.ib_flags = attr->flags; 1624 + 1625 + return &action->ib_action; 1626 + 1627 + err_parse: 1628 + kfree(action); 1629 + return ERR_PTR(err); 1630 + } 1631 + 1632 + static int 1633 + mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, 1634 + const struct ib_flow_action_attrs_esp *attr, 1635 + struct uverbs_attr_bundle *attrs) 1636 + { 1637 + struct mlx5_ib_flow_action *maction = to_mflow_act(action); 1638 + struct mlx5_accel_esp_xfrm_attrs accel_attrs; 1639 + int err = 0; 1640 + 1641 + if (attr->keymat || attr->replay || attr->encap || 1642 + attr->spi || attr->seq || attr->tfc_pad || 1643 + attr->hard_limit_pkts || 1644 + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 1645 + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | 1646 + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) 1647 + return -EOPNOTSUPP; 1648 + 1649 + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can 1650 + * be modified. 1651 + */ 1652 + if (!(maction->esp_aes_gcm.ib_flags & 1653 + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && 1654 + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 1655 + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) 1656 + return -EINVAL; 1657 + 1658 + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, 1659 + sizeof(accel_attrs)); 1660 + 1661 + accel_attrs.esn = attr->esn; 1662 + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) 1663 + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 1664 + else 1665 + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 1666 + 1667 + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, 1668 + &accel_attrs); 1669 + if (err) 1670 + return err; 1671 + 1672 + maction->esp_aes_gcm.ib_flags &= 1673 + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; 1674 + maction->esp_aes_gcm.ib_flags |= 1675 + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; 1676 + 1677 + return 0; 1678 + } 1679 + 1680 + static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) 1681 + { 1682 + switch (maction->flow_action_raw.sub_type) { 1683 + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: 1684 + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, 1685 + maction->flow_action_raw.modify_hdr); 1686 + break; 1687 + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: 1688 + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, 1689 + maction->flow_action_raw.pkt_reformat); 1690 + break; 1691 + case MLX5_IB_FLOW_ACTION_DECAP: 1692 + break; 1693 + default: 1694 + break; 1695 + } 1696 + } 1697 + 1698 + static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) 1699 + { 1700 + struct mlx5_ib_flow_action *maction = to_mflow_act(action); 1701 + 1702 + switch (action->type) { 1703 + case IB_FLOW_ACTION_ESP: 1704 + /* 1705 + * We only support aes_gcm by now, so we implicitly know this is 1706 + * the underline crypto. 1707 + */ 1708 + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); 1709 + break; 1710 + case IB_FLOW_ACTION_UNSPECIFIED: 1711 + destroy_flow_action_raw(maction); 1712 + break; 1713 + default: 1714 + WARN_ON(true); 1715 + break; 1716 + } 1717 + 1718 + kfree(maction); 1719 + return 0; 1720 + } 1721 + 1722 + static int 1723 + mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, 1724 + enum mlx5_flow_namespace_type *namespace) 1725 + { 1726 + switch (table_type) { 1727 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: 1728 + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; 1729 + break; 1730 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: 1731 + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; 1732 + break; 1733 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: 1734 + *namespace = MLX5_FLOW_NAMESPACE_FDB; 1735 + break; 1736 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: 1737 + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; 1738 + break; 1739 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: 1740 + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; 1741 + break; 1742 + default: 1743 + return -EINVAL; 1744 + } 1745 + 1746 + return 0; 1747 + } 1748 + 1749 + static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { 1750 + [MLX5_IB_FLOW_TYPE_NORMAL] = { 1751 + .type = UVERBS_ATTR_TYPE_PTR_IN, 1752 + .u.ptr = { 1753 + .len = sizeof(u16), /* data is priority */ 1754 + .min_len = sizeof(u16), 1755 + } 1756 + }, 1757 + [MLX5_IB_FLOW_TYPE_SNIFFER] = { 1758 + .type = UVERBS_ATTR_TYPE_PTR_IN, 1759 + UVERBS_ATTR_NO_DATA(), 1760 + }, 1761 + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { 1762 + .type = UVERBS_ATTR_TYPE_PTR_IN, 1763 + UVERBS_ATTR_NO_DATA(), 1764 + }, 1765 + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { 1766 + .type = UVERBS_ATTR_TYPE_PTR_IN, 1767 + UVERBS_ATTR_NO_DATA(), 1768 + }, 1769 + }; 1770 + 1771 + static bool is_flow_dest(void *obj, int *dest_id, int *dest_type) 1772 + { 1773 + struct devx_obj *devx_obj = obj; 1774 + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); 1775 + 1776 + switch (opcode) { 1777 + case MLX5_CMD_OP_DESTROY_TIR: 1778 + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1779 + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, 1780 + obj_id); 1781 + return true; 1782 + 1783 + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: 1784 + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1785 + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, 1786 + table_id); 1787 + return true; 1788 + default: 1789 + return false; 1790 + } 1791 + } 1792 + 1793 + static int get_dests(struct uverbs_attr_bundle *attrs, 1794 + struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, 1795 + int *dest_type, struct ib_qp **qp, u32 *flags) 1796 + { 1797 + bool dest_devx, dest_qp; 1798 + void *devx_obj; 1799 + int err; 1800 + 1801 + dest_devx = uverbs_attr_is_valid(attrs, 1802 + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); 1803 + dest_qp = uverbs_attr_is_valid(attrs, 1804 + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 1805 + 1806 + *flags = 0; 1807 + err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, 1808 + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | 1809 + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); 1810 + if (err) 1811 + return err; 1812 + 1813 + /* Both flags are not allowed */ 1814 + if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && 1815 + *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) 1816 + return -EINVAL; 1817 + 1818 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { 1819 + if (dest_devx && (dest_qp || *flags)) 1820 + return -EINVAL; 1821 + else if (dest_qp && *flags) 1822 + return -EINVAL; 1823 + } 1824 + 1825 + /* Allow only DEVX object, drop as dest for FDB */ 1826 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || 1827 + (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) 1828 + return -EINVAL; 1829 + 1830 + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ 1831 + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && 1832 + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) 1833 + return -EINVAL; 1834 + 1835 + *qp = NULL; 1836 + if (dest_devx) { 1837 + devx_obj = 1838 + uverbs_attr_get_obj(attrs, 1839 + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); 1840 + 1841 + /* Verify that the given DEVX object is a flow 1842 + * steering destination. 1843 + */ 1844 + if (!is_flow_dest(devx_obj, dest_id, dest_type)) 1845 + return -EINVAL; 1846 + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ 1847 + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || 1848 + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && 1849 + *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) 1850 + return -EINVAL; 1851 + } else if (dest_qp) { 1852 + struct mlx5_ib_qp *mqp; 1853 + 1854 + *qp = uverbs_attr_get_obj(attrs, 1855 + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 1856 + if (IS_ERR(*qp)) 1857 + return PTR_ERR(*qp); 1858 + 1859 + if ((*qp)->qp_type != IB_QPT_RAW_PACKET) 1860 + return -EINVAL; 1861 + 1862 + mqp = to_mqp(*qp); 1863 + if (mqp->is_rss) 1864 + *dest_id = mqp->rss_qp.tirn; 1865 + else 1866 + *dest_id = mqp->raw_packet_qp.rq.tirn; 1867 + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1868 + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || 1869 + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { 1870 + *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; 1871 + } 1872 + 1873 + if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && 1874 + (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || 1875 + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) 1876 + return -EINVAL; 1877 + 1878 + return 0; 1879 + } 1880 + 1881 + static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) 1882 + { 1883 + struct devx_obj *devx_obj = obj; 1884 + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); 1885 + 1886 + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { 1887 + 1888 + if (offset && offset >= devx_obj->flow_counter_bulk_size) 1889 + return false; 1890 + 1891 + *counter_id = MLX5_GET(dealloc_flow_counter_in, 1892 + devx_obj->dinbox, 1893 + flow_counter_id); 1894 + *counter_id += offset; 1895 + return true; 1896 + } 1897 + 1898 + return false; 1899 + } 1900 + 1901 + #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 1902 + static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( 1903 + struct uverbs_attr_bundle *attrs) 1904 + { 1905 + struct mlx5_flow_context flow_context = {.flow_tag = 1906 + MLX5_FS_DEFAULT_FLOW_TAG}; 1907 + u32 *offset_attr, offset = 0, counter_id = 0; 1908 + int dest_id, dest_type = -1, inlen, len, ret, i; 1909 + struct mlx5_ib_flow_handler *flow_handler; 1910 + struct mlx5_ib_flow_matcher *fs_matcher; 1911 + struct ib_uobject **arr_flow_actions; 1912 + struct ib_uflow_resources *uflow_res; 1913 + struct mlx5_flow_act flow_act = {}; 1914 + struct ib_qp *qp = NULL; 1915 + void *devx_obj, *cmd_in; 1916 + struct ib_uobject *uobj; 1917 + struct mlx5_ib_dev *dev; 1918 + u32 flags; 1919 + 1920 + if (!capable(CAP_NET_RAW)) 1921 + return -EPERM; 1922 + 1923 + fs_matcher = uverbs_attr_get_obj(attrs, 1924 + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); 1925 + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); 1926 + dev = mlx5_udata_to_mdev(&attrs->driver_udata); 1927 + 1928 + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) 1929 + return -EINVAL; 1930 + 1931 + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) 1932 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; 1933 + 1934 + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) 1935 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 1936 + 1937 + len = uverbs_attr_get_uobjs_arr(attrs, 1938 + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); 1939 + if (len) { 1940 + devx_obj = arr_flow_actions[0]->object; 1941 + 1942 + if (uverbs_attr_is_valid(attrs, 1943 + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { 1944 + 1945 + int num_offsets = uverbs_attr_ptr_get_array_size( 1946 + attrs, 1947 + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, 1948 + sizeof(u32)); 1949 + 1950 + if (num_offsets != 1) 1951 + return -EINVAL; 1952 + 1953 + offset_attr = uverbs_attr_get_alloced_ptr( 1954 + attrs, 1955 + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); 1956 + offset = *offset_attr; 1957 + } 1958 + 1959 + if (!is_flow_counter(devx_obj, offset, &counter_id)) 1960 + return -EINVAL; 1961 + 1962 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1963 + } 1964 + 1965 + cmd_in = uverbs_attr_get_alloced_ptr( 1966 + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 1967 + inlen = uverbs_attr_get_len(attrs, 1968 + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 1969 + 1970 + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); 1971 + if (!uflow_res) 1972 + return -ENOMEM; 1973 + 1974 + len = uverbs_attr_get_uobjs_arr(attrs, 1975 + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); 1976 + for (i = 0; i < len; i++) { 1977 + struct mlx5_ib_flow_action *maction = 1978 + to_mflow_act(arr_flow_actions[i]->object); 1979 + 1980 + ret = parse_flow_flow_action(maction, false, &flow_act); 1981 + if (ret) 1982 + goto err_out; 1983 + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, 1984 + arr_flow_actions[i]->object); 1985 + } 1986 + 1987 + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, 1988 + MLX5_IB_ATTR_CREATE_FLOW_TAG); 1989 + if (!ret) { 1990 + if (flow_context.flow_tag >= BIT(24)) { 1991 + ret = -EINVAL; 1992 + goto err_out; 1993 + } 1994 + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; 1995 + } 1996 + 1997 + flow_handler = 1998 + raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, 1999 + counter_id, cmd_in, inlen, dest_id, dest_type); 2000 + if (IS_ERR(flow_handler)) { 2001 + ret = PTR_ERR(flow_handler); 2002 + goto err_out; 2003 + } 2004 + 2005 + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); 2006 + 2007 + return 0; 2008 + err_out: 2009 + ib_uverbs_flow_resources_free(uflow_res); 2010 + return ret; 2011 + } 2012 + 2013 + static int flow_matcher_cleanup(struct ib_uobject *uobject, 2014 + enum rdma_remove_reason why, 2015 + struct uverbs_attr_bundle *attrs) 2016 + { 2017 + struct mlx5_ib_flow_matcher *obj = uobject->object; 2018 + int ret; 2019 + 2020 + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); 2021 + if (ret) 2022 + return ret; 2023 + 2024 + kfree(obj); 2025 + return 0; 2026 + } 2027 + 2028 + static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, 2029 + struct mlx5_ib_flow_matcher *obj) 2030 + { 2031 + enum mlx5_ib_uapi_flow_table_type ft_type = 2032 + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; 2033 + u32 flags; 2034 + int err; 2035 + 2036 + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older 2037 + * users should switch to it. We leave this to not break userspace 2038 + */ 2039 + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && 2040 + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) 2041 + return -EINVAL; 2042 + 2043 + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { 2044 + err = uverbs_get_const(&ft_type, attrs, 2045 + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); 2046 + if (err) 2047 + return err; 2048 + 2049 + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); 2050 + if (err) 2051 + return err; 2052 + 2053 + return 0; 2054 + } 2055 + 2056 + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { 2057 + err = uverbs_get_flags32(&flags, attrs, 2058 + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 2059 + IB_FLOW_ATTR_FLAGS_EGRESS); 2060 + if (err) 2061 + return err; 2062 + 2063 + if (flags) { 2064 + mlx5_ib_ft_type_to_namespace( 2065 + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, 2066 + &obj->ns_type); 2067 + return 0; 2068 + } 2069 + } 2070 + 2071 + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; 2072 + 2073 + return 0; 2074 + } 2075 + 2076 + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( 2077 + struct uverbs_attr_bundle *attrs) 2078 + { 2079 + struct ib_uobject *uobj = uverbs_attr_get_uobject( 2080 + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); 2081 + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); 2082 + struct mlx5_ib_flow_matcher *obj; 2083 + int err; 2084 + 2085 + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); 2086 + if (!obj) 2087 + return -ENOMEM; 2088 + 2089 + obj->mask_len = uverbs_attr_get_len( 2090 + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 2091 + err = uverbs_copy_from(&obj->matcher_mask, 2092 + attrs, 2093 + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 2094 + if (err) 2095 + goto end; 2096 + 2097 + obj->flow_type = uverbs_attr_get_enum_id( 2098 + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); 2099 + 2100 + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { 2101 + err = uverbs_copy_from(&obj->priority, 2102 + attrs, 2103 + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); 2104 + if (err) 2105 + goto end; 2106 + } 2107 + 2108 + err = uverbs_copy_from(&obj->match_criteria_enable, 2109 + attrs, 2110 + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); 2111 + if (err) 2112 + goto end; 2113 + 2114 + err = mlx5_ib_matcher_ns(attrs, obj); 2115 + if (err) 2116 + goto end; 2117 + 2118 + uobj->object = obj; 2119 + obj->mdev = dev->mdev; 2120 + atomic_set(&obj->usecnt, 0); 2121 + return 0; 2122 + 2123 + end: 2124 + kfree(obj); 2125 + return err; 2126 + } 2127 + 2128 + static struct ib_flow_action * 2129 + mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, 2130 + enum mlx5_ib_uapi_flow_table_type ft_type, 2131 + u8 num_actions, void *in) 2132 + { 2133 + enum mlx5_flow_namespace_type namespace; 2134 + struct mlx5_ib_flow_action *maction; 2135 + int ret; 2136 + 2137 + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 2138 + if (ret) 2139 + return ERR_PTR(-EINVAL); 2140 + 2141 + maction = kzalloc(sizeof(*maction), GFP_KERNEL); 2142 + if (!maction) 2143 + return ERR_PTR(-ENOMEM); 2144 + 2145 + maction->flow_action_raw.modify_hdr = 2146 + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); 2147 + 2148 + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { 2149 + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); 2150 + kfree(maction); 2151 + return ERR_PTR(ret); 2152 + } 2153 + maction->flow_action_raw.sub_type = 2154 + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; 2155 + maction->flow_action_raw.dev = dev; 2156 + 2157 + return &maction->ib_action; 2158 + } 2159 + 2160 + static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) 2161 + { 2162 + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 2163 + max_modify_header_actions) || 2164 + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 2165 + max_modify_header_actions) || 2166 + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 2167 + max_modify_header_actions); 2168 + } 2169 + 2170 + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( 2171 + struct uverbs_attr_bundle *attrs) 2172 + { 2173 + struct ib_uobject *uobj = uverbs_attr_get_uobject( 2174 + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); 2175 + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); 2176 + enum mlx5_ib_uapi_flow_table_type ft_type; 2177 + struct ib_flow_action *action; 2178 + int num_actions; 2179 + void *in; 2180 + int ret; 2181 + 2182 + if (!mlx5_ib_modify_header_supported(mdev)) 2183 + return -EOPNOTSUPP; 2184 + 2185 + in = uverbs_attr_get_alloced_ptr(attrs, 2186 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); 2187 + 2188 + num_actions = uverbs_attr_ptr_get_array_size( 2189 + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 2190 + MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); 2191 + if (num_actions < 0) 2192 + return num_actions; 2193 + 2194 + ret = uverbs_get_const(&ft_type, attrs, 2195 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); 2196 + if (ret) 2197 + return ret; 2198 + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); 2199 + if (IS_ERR(action)) 2200 + return PTR_ERR(action); 2201 + 2202 + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, 2203 + IB_FLOW_ACTION_UNSPECIFIED); 2204 + 2205 + return 0; 2206 + } 2207 + 2208 + static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, 2209 + u8 packet_reformat_type, 2210 + u8 ft_type) 2211 + { 2212 + switch (packet_reformat_type) { 2213 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 2214 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 2215 + return MLX5_CAP_FLOWTABLE(ibdev->mdev, 2216 + encap_general_header); 2217 + break; 2218 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 2219 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 2220 + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, 2221 + reformat_l2_to_l3_tunnel); 2222 + break; 2223 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 2224 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 2225 + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, 2226 + reformat_l3_tunnel_to_l2); 2227 + break; 2228 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: 2229 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 2230 + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); 2231 + break; 2232 + default: 2233 + break; 2234 + } 2235 + 2236 + return false; 2237 + } 2238 + 2239 + static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) 2240 + { 2241 + switch (dv_prt) { 2242 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 2243 + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; 2244 + break; 2245 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 2246 + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 2247 + break; 2248 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 2249 + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; 2250 + break; 2251 + default: 2252 + return -EINVAL; 2253 + } 2254 + 2255 + return 0; 2256 + } 2257 + 2258 + static int mlx5_ib_flow_action_create_packet_reformat_ctx( 2259 + struct mlx5_ib_dev *dev, 2260 + struct mlx5_ib_flow_action *maction, 2261 + u8 ft_type, u8 dv_prt, 2262 + void *in, size_t len) 2263 + { 2264 + enum mlx5_flow_namespace_type namespace; 2265 + u8 prm_prt; 2266 + int ret; 2267 + 2268 + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 2269 + if (ret) 2270 + return ret; 2271 + 2272 + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); 2273 + if (ret) 2274 + return ret; 2275 + 2276 + maction->flow_action_raw.pkt_reformat = 2277 + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, 2278 + in, namespace); 2279 + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { 2280 + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); 2281 + return ret; 2282 + } 2283 + 2284 + maction->flow_action_raw.sub_type = 2285 + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; 2286 + maction->flow_action_raw.dev = dev; 2287 + 2288 + return 0; 2289 + } 2290 + 2291 + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( 2292 + struct uverbs_attr_bundle *attrs) 2293 + { 2294 + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, 2295 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); 2296 + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); 2297 + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; 2298 + enum mlx5_ib_uapi_flow_table_type ft_type; 2299 + struct mlx5_ib_flow_action *maction; 2300 + int ret; 2301 + 2302 + ret = uverbs_get_const(&ft_type, attrs, 2303 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); 2304 + if (ret) 2305 + return ret; 2306 + 2307 + ret = uverbs_get_const(&dv_prt, attrs, 2308 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); 2309 + if (ret) 2310 + return ret; 2311 + 2312 + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) 2313 + return -EOPNOTSUPP; 2314 + 2315 + maction = kzalloc(sizeof(*maction), GFP_KERNEL); 2316 + if (!maction) 2317 + return -ENOMEM; 2318 + 2319 + if (dv_prt == 2320 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { 2321 + maction->flow_action_raw.sub_type = 2322 + MLX5_IB_FLOW_ACTION_DECAP; 2323 + maction->flow_action_raw.dev = mdev; 2324 + } else { 2325 + void *in; 2326 + int len; 2327 + 2328 + in = uverbs_attr_get_alloced_ptr(attrs, 2329 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 2330 + if (IS_ERR(in)) { 2331 + ret = PTR_ERR(in); 2332 + goto free_maction; 2333 + } 2334 + 2335 + len = uverbs_attr_get_len(attrs, 2336 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 2337 + 2338 + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, 2339 + maction, ft_type, dv_prt, in, len); 2340 + if (ret) 2341 + goto free_maction; 2342 + } 2343 + 2344 + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, 2345 + IB_FLOW_ACTION_UNSPECIFIED); 2346 + return 0; 2347 + 2348 + free_maction: 2349 + kfree(maction); 2350 + return ret; 2351 + } 2352 + 2353 + DECLARE_UVERBS_NAMED_METHOD( 2354 + MLX5_IB_METHOD_CREATE_FLOW, 2355 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, 2356 + UVERBS_OBJECT_FLOW, 2357 + UVERBS_ACCESS_NEW, 2358 + UA_MANDATORY), 2359 + UVERBS_ATTR_PTR_IN( 2360 + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, 2361 + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), 2362 + UA_MANDATORY, 2363 + UA_ALLOC_AND_COPY), 2364 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, 2365 + MLX5_IB_OBJECT_FLOW_MATCHER, 2366 + UVERBS_ACCESS_READ, 2367 + UA_MANDATORY), 2368 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, 2369 + UVERBS_OBJECT_QP, 2370 + UVERBS_ACCESS_READ), 2371 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 2372 + MLX5_IB_OBJECT_DEVX_OBJ, 2373 + UVERBS_ACCESS_READ), 2374 + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, 2375 + UVERBS_OBJECT_FLOW_ACTION, 2376 + UVERBS_ACCESS_READ, 1, 2377 + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, 2378 + UA_OPTIONAL), 2379 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, 2380 + UVERBS_ATTR_TYPE(u32), 2381 + UA_OPTIONAL), 2382 + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, 2383 + MLX5_IB_OBJECT_DEVX_OBJ, 2384 + UVERBS_ACCESS_READ, 1, 1, 2385 + UA_OPTIONAL), 2386 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, 2387 + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), 2388 + UA_OPTIONAL, 2389 + UA_ALLOC_AND_COPY), 2390 + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, 2391 + enum mlx5_ib_create_flow_flags, 2392 + UA_OPTIONAL)); 2393 + 2394 + DECLARE_UVERBS_NAMED_METHOD_DESTROY( 2395 + MLX5_IB_METHOD_DESTROY_FLOW, 2396 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, 2397 + UVERBS_OBJECT_FLOW, 2398 + UVERBS_ACCESS_DESTROY, 2399 + UA_MANDATORY)); 2400 + 2401 + ADD_UVERBS_METHODS(mlx5_ib_fs, 2402 + UVERBS_OBJECT_FLOW, 2403 + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), 2404 + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); 2405 + 2406 + DECLARE_UVERBS_NAMED_METHOD( 2407 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, 2408 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, 2409 + UVERBS_OBJECT_FLOW_ACTION, 2410 + UVERBS_ACCESS_NEW, 2411 + UA_MANDATORY), 2412 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 2413 + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( 2414 + set_add_copy_action_in_auto)), 2415 + UA_MANDATORY, 2416 + UA_ALLOC_AND_COPY), 2417 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, 2418 + enum mlx5_ib_uapi_flow_table_type, 2419 + UA_MANDATORY)); 2420 + 2421 + DECLARE_UVERBS_NAMED_METHOD( 2422 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 2423 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, 2424 + UVERBS_OBJECT_FLOW_ACTION, 2425 + UVERBS_ACCESS_NEW, 2426 + UA_MANDATORY), 2427 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, 2428 + UVERBS_ATTR_MIN_SIZE(1), 2429 + UA_ALLOC_AND_COPY, 2430 + UA_OPTIONAL), 2431 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, 2432 + enum mlx5_ib_uapi_flow_action_packet_reformat_type, 2433 + UA_MANDATORY), 2434 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, 2435 + enum mlx5_ib_uapi_flow_table_type, 2436 + UA_MANDATORY)); 2437 + 2438 + ADD_UVERBS_METHODS( 2439 + mlx5_ib_flow_actions, 2440 + UVERBS_OBJECT_FLOW_ACTION, 2441 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), 2442 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); 2443 + 2444 + DECLARE_UVERBS_NAMED_METHOD( 2445 + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, 2446 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, 2447 + MLX5_IB_OBJECT_FLOW_MATCHER, 2448 + UVERBS_ACCESS_NEW, 2449 + UA_MANDATORY), 2450 + UVERBS_ATTR_PTR_IN( 2451 + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, 2452 + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), 2453 + UA_MANDATORY), 2454 + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, 2455 + mlx5_ib_flow_type, 2456 + UA_MANDATORY), 2457 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 2458 + UVERBS_ATTR_TYPE(u8), 2459 + UA_MANDATORY), 2460 + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 2461 + enum ib_flow_flags, 2462 + UA_OPTIONAL), 2463 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, 2464 + enum mlx5_ib_uapi_flow_table_type, 2465 + UA_OPTIONAL)); 2466 + 2467 + DECLARE_UVERBS_NAMED_METHOD_DESTROY( 2468 + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, 2469 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, 2470 + MLX5_IB_OBJECT_FLOW_MATCHER, 2471 + UVERBS_ACCESS_DESTROY, 2472 + UA_MANDATORY)); 2473 + 2474 + DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, 2475 + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), 2476 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), 2477 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); 2478 + 2479 + const struct uapi_definition mlx5_ib_flow_defs[] = { 2480 + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( 2481 + MLX5_IB_OBJECT_FLOW_MATCHER), 2482 + UAPI_DEF_CHAIN_OBJ_TREE( 2483 + UVERBS_OBJECT_FLOW, 2484 + &mlx5_ib_fs), 2485 + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, 2486 + &mlx5_ib_flow_actions), 2487 + {}, 2488 + }; 2489 + 2490 + static const struct ib_device_ops flow_ops = { 2491 + .create_flow = mlx5_ib_create_flow, 2492 + .destroy_flow = mlx5_ib_destroy_flow, 2493 + .destroy_flow_action = mlx5_ib_destroy_flow_action, 2494 + }; 2495 + 2496 + static const struct ib_device_ops flow_ipsec_ops = { 2497 + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, 2498 + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, 2499 + }; 2500 + 2501 + int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) 2502 + { 2503 + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); 2504 + 2505 + if (!dev->flow_db) 2506 + return -ENOMEM; 2507 + 2508 + mutex_init(&dev->flow_db->lock); 2509 + 2510 + ib_set_device_ops(&dev->ib_dev, &flow_ops); 2511 + if (mlx5_accel_ipsec_device_caps(dev->mdev) & 2512 + MLX5_ACCEL_IPSEC_CAP_DEVICE) 2513 + ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); 2514 + 2515 + return 0; 2516 + }
+29
drivers/infiniband/hw/mlx5/fs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* 3 + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #ifndef _MLX5_IB_FS_H 7 + #define _MLX5_IB_FS_H 8 + 9 + #include "mlx5_ib.h" 10 + 11 + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 12 + int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); 13 + #else 14 + static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) 15 + { 16 + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); 17 + 18 + if (!dev->flow_db) 19 + return -ENOMEM; 20 + 21 + mutex_init(&dev->flow_db->lock); 22 + return 0; 23 + } 24 + #endif 25 + static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) 26 + { 27 + kfree(dev->flow_db); 28 + } 29 + #endif /* _MLX5_IB_FS_H */
+374 -2880
drivers/infiniband/hw/mlx5/main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 1 2 /* 2 - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 3 + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 31 4 */ 32 5 33 6 #include <linux/debugfs.h> ··· 32 59 #include "mlx5_ib.h" 33 60 #include "ib_rep.h" 34 61 #include "cmd.h" 62 + #include "devx.h" 63 + #include "fs.h" 35 64 #include "srq.h" 36 65 #include "qp.h" 37 66 #include "wr.h" 38 - #include <linux/mlx5/fs_helpers.h> 67 + #include "restrack.h" 68 + #include "counters.h" 39 69 #include <linux/mlx5/accel.h> 40 70 #include <rdma/uverbs_std_types.h> 41 71 #include <rdma/mlx5_user_ioctl_verbs.h> ··· 287 311 *native_port_num = 1; 288 312 289 313 port = &ibdev->port[ib_port_num - 1]; 290 - if (!port) 291 - return NULL; 292 - 293 314 spin_lock(&port->mp.mpi_lock); 294 315 mpi = ibdev->port[ib_port_num - 1].mp.mpi; 295 316 if (mpi && !mpi->unaffiliate) { ··· 1738 1765 mlx5_ib_disable_lb(dev, true, false); 1739 1766 } 1740 1767 1768 + static int set_ucontext_resp(struct ib_ucontext *uctx, 1769 + struct mlx5_ib_alloc_ucontext_resp *resp) 1770 + { 1771 + struct ib_device *ibdev = uctx->device; 1772 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 1773 + struct mlx5_ib_ucontext *context = to_mucontext(uctx); 1774 + struct mlx5_bfreg_info *bfregi = &context->bfregi; 1775 + int err; 1776 + 1777 + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1778 + err = mlx5_cmd_dump_fill_mkey(dev->mdev, 1779 + &resp->dump_fill_mkey); 1780 + if (err) 1781 + return err; 1782 + resp->comp_mask |= 1783 + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; 1784 + } 1785 + 1786 + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1787 + if (dev->wc_support) 1788 + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, 1789 + log_bf_reg_size); 1790 + resp->cache_line_size = cache_line_size(); 1791 + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 1792 + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 1793 + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1794 + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1795 + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 1796 + resp->cqe_version = context->cqe_version; 1797 + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1798 + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; 1799 + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1800 + MLX5_CAP_GEN(dev->mdev, 1801 + num_of_uars_per_page) : 1; 1802 + 1803 + if (mlx5_accel_ipsec_device_caps(dev->mdev) & 1804 + MLX5_ACCEL_IPSEC_CAP_DEVICE) { 1805 + if (mlx5_get_flow_namespace(dev->mdev, 1806 + MLX5_FLOW_NAMESPACE_EGRESS)) 1807 + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; 1808 + if (mlx5_accel_ipsec_device_caps(dev->mdev) & 1809 + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) 1810 + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; 1811 + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) 1812 + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; 1813 + if (mlx5_accel_ipsec_device_caps(dev->mdev) & 1814 + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) 1815 + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; 1816 + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ 1817 + } 1818 + 1819 + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : 1820 + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; 1821 + resp->num_ports = dev->num_ports; 1822 + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1823 + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1824 + 1825 + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { 1826 + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); 1827 + resp->eth_min_inline++; 1828 + } 1829 + 1830 + if (dev->mdev->clock_info) 1831 + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); 1832 + 1833 + /* 1834 + * We don't want to expose information from the PCI bar that is located 1835 + * after 4096 bytes, so if the arch only supports larger pages, let's 1836 + * pretend we don't support reading the HCA's core clock. This is also 1837 + * forced by mmap function. 1838 + */ 1839 + if (PAGE_SIZE <= 4096) { 1840 + resp->comp_mask |= 1841 + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 1842 + resp->hca_core_clock_offset = 1843 + offsetof(struct mlx5_init_seg, 1844 + internal_timer_h) % PAGE_SIZE; 1845 + } 1846 + 1847 + if (MLX5_CAP_GEN(dev->mdev, ece_support)) 1848 + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; 1849 + 1850 + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; 1851 + return 0; 1852 + } 1853 + 1741 1854 static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, 1742 1855 struct ib_udata *udata) 1743 1856 { ··· 1831 1772 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1832 1773 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 1833 1774 struct mlx5_ib_alloc_ucontext_resp resp = {}; 1834 - struct mlx5_core_dev *mdev = dev->mdev; 1835 1775 struct mlx5_ib_ucontext *context = to_mucontext(uctx); 1836 1776 struct mlx5_bfreg_info *bfregi; 1837 1777 int ver; 1838 1778 int err; 1839 1779 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 1840 1780 max_cqe_version); 1841 - u32 dump_fill_mkey; 1842 1781 bool lib_uar_4k; 1843 1782 bool lib_uar_dyn; 1844 1783 ··· 1864 1807 MLX5_NON_FP_BFREGS_PER_UAR); 1865 1808 if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) 1866 1809 return -EINVAL; 1867 - 1868 - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1869 - if (dev->wc_support) 1870 - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 1871 - resp.cache_line_size = cache_line_size(); 1872 - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 1873 - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 1874 - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1875 - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1876 - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 1877 - resp.cqe_version = min_t(__u8, 1878 - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 1879 - req.max_cqe_version); 1880 - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1881 - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; 1882 - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1883 - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; 1884 - resp.response_length = min(offsetof(typeof(resp), response_length) + 1885 - sizeof(resp.response_length), udata->outlen); 1886 - 1887 - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { 1888 - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) 1889 - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; 1890 - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) 1891 - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; 1892 - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) 1893 - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; 1894 - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) 1895 - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; 1896 - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ 1897 - } 1898 1810 1899 1811 lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; 1900 1812 lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; ··· 1913 1887 if (err) 1914 1888 goto out_devx; 1915 1889 1916 - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1917 - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); 1918 - if (err) 1919 - goto out_mdev; 1920 - } 1921 - 1922 1890 INIT_LIST_HEAD(&context->db_page_list); 1923 1891 mutex_init(&context->db_page_mutex); 1924 1892 1925 - resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; 1926 - resp.num_ports = dev->num_ports; 1893 + context->cqe_version = min_t(__u8, 1894 + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 1895 + req.max_cqe_version); 1927 1896 1928 - if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) 1929 - resp.response_length += sizeof(resp.cqe_version); 1897 + err = set_ucontext_resp(uctx, &resp); 1898 + if (err) 1899 + goto out_mdev; 1930 1900 1931 - if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { 1932 - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1933 - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1934 - resp.response_length += sizeof(resp.cmds_supp_uhw); 1935 - } 1936 - 1937 - if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { 1938 - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { 1939 - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); 1940 - resp.eth_min_inline++; 1941 - } 1942 - resp.response_length += sizeof(resp.eth_min_inline); 1943 - } 1944 - 1945 - if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { 1946 - if (mdev->clock_info) 1947 - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); 1948 - resp.response_length += sizeof(resp.clock_info_versions); 1949 - } 1950 - 1951 - /* 1952 - * We don't want to expose information from the PCI bar that is located 1953 - * after 4096 bytes, so if the arch only supports larger pages, let's 1954 - * pretend we don't support reading the HCA's core clock. This is also 1955 - * forced by mmap function. 1956 - */ 1957 - if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { 1958 - if (PAGE_SIZE <= 4096) { 1959 - resp.comp_mask |= 1960 - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 1961 - resp.hca_core_clock_offset = 1962 - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; 1963 - } 1964 - resp.response_length += sizeof(resp.hca_core_clock_offset); 1965 - } 1966 - 1967 - if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) 1968 - resp.response_length += sizeof(resp.log_uar_size); 1969 - 1970 - if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) 1971 - resp.response_length += sizeof(resp.num_uars_per_page); 1972 - 1973 - if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { 1974 - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; 1975 - resp.response_length += sizeof(resp.num_dyn_bfregs); 1976 - } 1977 - 1978 - if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { 1979 - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1980 - resp.dump_fill_mkey = dump_fill_mkey; 1981 - resp.comp_mask |= 1982 - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; 1983 - } 1984 - resp.response_length += sizeof(resp.dump_fill_mkey); 1985 - } 1986 - 1987 - if (MLX5_CAP_GEN(dev->mdev, ece_support)) 1988 - resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; 1989 - 1901 + resp.response_length = min(udata->outlen, sizeof(resp)); 1990 1902 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1991 1903 if (err) 1992 1904 goto out_mdev; 1993 1905 1994 1906 bfregi->ver = ver; 1995 1907 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; 1996 - context->cqe_version = resp.cqe_version; 1997 1908 context->lib_caps = req.lib_caps; 1998 1909 print_lib_caps(dev, context->lib_caps); 1999 1910 ··· 1961 1998 1962 1999 out_ctx: 1963 2000 return err; 2001 + } 2002 + 2003 + static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, 2004 + struct uverbs_attr_bundle *attrs) 2005 + { 2006 + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; 2007 + int ret; 2008 + 2009 + ret = set_ucontext_resp(ibcontext, &uctx_resp); 2010 + if (ret) 2011 + return ret; 2012 + 2013 + uctx_resp.response_length = 2014 + min_t(size_t, 2015 + uverbs_attr_get_len(attrs, 2016 + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), 2017 + sizeof(uctx_resp)); 2018 + 2019 + ret = uverbs_copy_to_struct_or_zero(attrs, 2020 + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, 2021 + &uctx_resp, 2022 + sizeof(uctx_resp)); 2023 + return ret; 1964 2024 } 1965 2025 1966 2026 static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) ··· 2577 2591 mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); 2578 2592 } 2579 2593 2580 - enum { 2581 - MATCH_CRITERIA_ENABLE_OUTER_BIT, 2582 - MATCH_CRITERIA_ENABLE_MISC_BIT, 2583 - MATCH_CRITERIA_ENABLE_INNER_BIT, 2584 - MATCH_CRITERIA_ENABLE_MISC2_BIT 2585 - }; 2586 - 2587 - #define HEADER_IS_ZERO(match_criteria, headers) \ 2588 - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 2589 - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ 2590 - 2591 - static u8 get_match_criteria_enable(u32 *match_criteria) 2592 - { 2593 - u8 match_criteria_enable; 2594 - 2595 - match_criteria_enable = 2596 - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << 2597 - MATCH_CRITERIA_ENABLE_OUTER_BIT; 2598 - match_criteria_enable |= 2599 - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << 2600 - MATCH_CRITERIA_ENABLE_MISC_BIT; 2601 - match_criteria_enable |= 2602 - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << 2603 - MATCH_CRITERIA_ENABLE_INNER_BIT; 2604 - match_criteria_enable |= 2605 - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << 2606 - MATCH_CRITERIA_ENABLE_MISC2_BIT; 2607 - 2608 - return match_criteria_enable; 2609 - } 2610 - 2611 - static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) 2612 - { 2613 - u8 entry_mask; 2614 - u8 entry_val; 2615 - int err = 0; 2616 - 2617 - if (!mask) 2618 - goto out; 2619 - 2620 - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, 2621 - ip_protocol); 2622 - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, 2623 - ip_protocol); 2624 - if (!entry_mask) { 2625 - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); 2626 - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); 2627 - goto out; 2628 - } 2629 - /* Don't override existing ip protocol */ 2630 - if (mask != entry_mask || val != entry_val) 2631 - err = -EINVAL; 2632 - out: 2633 - return err; 2634 - } 2635 - 2636 - static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, 2637 - bool inner) 2638 - { 2639 - if (inner) { 2640 - MLX5_SET(fte_match_set_misc, 2641 - misc_c, inner_ipv6_flow_label, mask); 2642 - MLX5_SET(fte_match_set_misc, 2643 - misc_v, inner_ipv6_flow_label, val); 2644 - } else { 2645 - MLX5_SET(fte_match_set_misc, 2646 - misc_c, outer_ipv6_flow_label, mask); 2647 - MLX5_SET(fte_match_set_misc, 2648 - misc_v, outer_ipv6_flow_label, val); 2649 - } 2650 - } 2651 - 2652 - static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) 2653 - { 2654 - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); 2655 - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); 2656 - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); 2657 - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); 2658 - } 2659 - 2660 - static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) 2661 - { 2662 - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && 2663 - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) 2664 - return -EOPNOTSUPP; 2665 - 2666 - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && 2667 - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) 2668 - return -EOPNOTSUPP; 2669 - 2670 - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && 2671 - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) 2672 - return -EOPNOTSUPP; 2673 - 2674 - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && 2675 - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) 2676 - return -EOPNOTSUPP; 2677 - 2678 - return 0; 2679 - } 2680 - 2681 - #define LAST_ETH_FIELD vlan_tag 2682 - #define LAST_IB_FIELD sl 2683 - #define LAST_IPV4_FIELD tos 2684 - #define LAST_IPV6_FIELD traffic_class 2685 - #define LAST_TCP_UDP_FIELD src_port 2686 - #define LAST_TUNNEL_FIELD tunnel_id 2687 - #define LAST_FLOW_TAG_FIELD tag_id 2688 - #define LAST_DROP_FIELD size 2689 - #define LAST_COUNTERS_FIELD counters 2690 - 2691 - /* Field is the last supported field */ 2692 - #define FIELDS_NOT_SUPPORTED(filter, field)\ 2693 - memchr_inv((void *)&filter.field +\ 2694 - sizeof(filter.field), 0,\ 2695 - sizeof(filter) -\ 2696 - offsetof(typeof(filter), field) -\ 2697 - sizeof(filter.field)) 2698 - 2699 - int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, 2700 - bool is_egress, 2701 - struct mlx5_flow_act *action) 2702 - { 2703 - 2704 - switch (maction->ib_action.type) { 2705 - case IB_FLOW_ACTION_ESP: 2706 - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | 2707 - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) 2708 - return -EINVAL; 2709 - /* Currently only AES_GCM keymat is supported by the driver */ 2710 - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; 2711 - action->action |= is_egress ? 2712 - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : 2713 - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; 2714 - return 0; 2715 - case IB_FLOW_ACTION_UNSPECIFIED: 2716 - if (maction->flow_action_raw.sub_type == 2717 - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { 2718 - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 2719 - return -EINVAL; 2720 - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2721 - action->modify_hdr = 2722 - maction->flow_action_raw.modify_hdr; 2723 - return 0; 2724 - } 2725 - if (maction->flow_action_raw.sub_type == 2726 - MLX5_IB_FLOW_ACTION_DECAP) { 2727 - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) 2728 - return -EINVAL; 2729 - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2730 - return 0; 2731 - } 2732 - if (maction->flow_action_raw.sub_type == 2733 - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { 2734 - if (action->action & 2735 - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) 2736 - return -EINVAL; 2737 - action->action |= 2738 - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; 2739 - action->pkt_reformat = 2740 - maction->flow_action_raw.pkt_reformat; 2741 - return 0; 2742 - } 2743 - /* fall through */ 2744 - default: 2745 - return -EOPNOTSUPP; 2746 - } 2747 - } 2748 - 2749 - static int parse_flow_attr(struct mlx5_core_dev *mdev, 2750 - struct mlx5_flow_spec *spec, 2751 - const union ib_flow_spec *ib_spec, 2752 - const struct ib_flow_attr *flow_attr, 2753 - struct mlx5_flow_act *action, u32 prev_type) 2754 - { 2755 - struct mlx5_flow_context *flow_context = &spec->flow_context; 2756 - u32 *match_c = spec->match_criteria; 2757 - u32 *match_v = spec->match_value; 2758 - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 2759 - misc_parameters); 2760 - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, 2761 - misc_parameters); 2762 - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, 2763 - misc_parameters_2); 2764 - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, 2765 - misc_parameters_2); 2766 - void *headers_c; 2767 - void *headers_v; 2768 - int match_ipv; 2769 - int ret; 2770 - 2771 - if (ib_spec->type & IB_FLOW_SPEC_INNER) { 2772 - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 2773 - inner_headers); 2774 - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 2775 - inner_headers); 2776 - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2777 - ft_field_support.inner_ip_version); 2778 - } else { 2779 - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 2780 - outer_headers); 2781 - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 2782 - outer_headers); 2783 - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2784 - ft_field_support.outer_ip_version); 2785 - } 2786 - 2787 - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 2788 - case IB_FLOW_SPEC_ETH: 2789 - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 2790 - return -EOPNOTSUPP; 2791 - 2792 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2793 - dmac_47_16), 2794 - ib_spec->eth.mask.dst_mac); 2795 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2796 - dmac_47_16), 2797 - ib_spec->eth.val.dst_mac); 2798 - 2799 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2800 - smac_47_16), 2801 - ib_spec->eth.mask.src_mac); 2802 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2803 - smac_47_16), 2804 - ib_spec->eth.val.src_mac); 2805 - 2806 - if (ib_spec->eth.mask.vlan_tag) { 2807 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2808 - cvlan_tag, 1); 2809 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2810 - cvlan_tag, 1); 2811 - 2812 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2813 - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 2814 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2815 - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 2816 - 2817 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2818 - first_cfi, 2819 - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 2820 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2821 - first_cfi, 2822 - ntohs(ib_spec->eth.val.vlan_tag) >> 12); 2823 - 2824 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2825 - first_prio, 2826 - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 2827 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2828 - first_prio, 2829 - ntohs(ib_spec->eth.val.vlan_tag) >> 13); 2830 - } 2831 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2832 - ethertype, ntohs(ib_spec->eth.mask.ether_type)); 2833 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2834 - ethertype, ntohs(ib_spec->eth.val.ether_type)); 2835 - break; 2836 - case IB_FLOW_SPEC_IPV4: 2837 - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 2838 - return -EOPNOTSUPP; 2839 - 2840 - if (match_ipv) { 2841 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2842 - ip_version, 0xf); 2843 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2844 - ip_version, MLX5_FS_IPV4_VERSION); 2845 - } else { 2846 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2847 - ethertype, 0xffff); 2848 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2849 - ethertype, ETH_P_IP); 2850 - } 2851 - 2852 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2853 - src_ipv4_src_ipv6.ipv4_layout.ipv4), 2854 - &ib_spec->ipv4.mask.src_ip, 2855 - sizeof(ib_spec->ipv4.mask.src_ip)); 2856 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2857 - src_ipv4_src_ipv6.ipv4_layout.ipv4), 2858 - &ib_spec->ipv4.val.src_ip, 2859 - sizeof(ib_spec->ipv4.val.src_ip)); 2860 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2861 - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2862 - &ib_spec->ipv4.mask.dst_ip, 2863 - sizeof(ib_spec->ipv4.mask.dst_ip)); 2864 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2865 - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2866 - &ib_spec->ipv4.val.dst_ip, 2867 - sizeof(ib_spec->ipv4.val.dst_ip)); 2868 - 2869 - set_tos(headers_c, headers_v, 2870 - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); 2871 - 2872 - if (set_proto(headers_c, headers_v, 2873 - ib_spec->ipv4.mask.proto, 2874 - ib_spec->ipv4.val.proto)) 2875 - return -EINVAL; 2876 - break; 2877 - case IB_FLOW_SPEC_IPV6: 2878 - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 2879 - return -EOPNOTSUPP; 2880 - 2881 - if (match_ipv) { 2882 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2883 - ip_version, 0xf); 2884 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2885 - ip_version, MLX5_FS_IPV6_VERSION); 2886 - } else { 2887 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2888 - ethertype, 0xffff); 2889 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2890 - ethertype, ETH_P_IPV6); 2891 - } 2892 - 2893 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2894 - src_ipv4_src_ipv6.ipv6_layout.ipv6), 2895 - &ib_spec->ipv6.mask.src_ip, 2896 - sizeof(ib_spec->ipv6.mask.src_ip)); 2897 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2898 - src_ipv4_src_ipv6.ipv6_layout.ipv6), 2899 - &ib_spec->ipv6.val.src_ip, 2900 - sizeof(ib_spec->ipv6.val.src_ip)); 2901 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2902 - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2903 - &ib_spec->ipv6.mask.dst_ip, 2904 - sizeof(ib_spec->ipv6.mask.dst_ip)); 2905 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2906 - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2907 - &ib_spec->ipv6.val.dst_ip, 2908 - sizeof(ib_spec->ipv6.val.dst_ip)); 2909 - 2910 - set_tos(headers_c, headers_v, 2911 - ib_spec->ipv6.mask.traffic_class, 2912 - ib_spec->ipv6.val.traffic_class); 2913 - 2914 - if (set_proto(headers_c, headers_v, 2915 - ib_spec->ipv6.mask.next_hdr, 2916 - ib_spec->ipv6.val.next_hdr)) 2917 - return -EINVAL; 2918 - 2919 - set_flow_label(misc_params_c, misc_params_v, 2920 - ntohl(ib_spec->ipv6.mask.flow_label), 2921 - ntohl(ib_spec->ipv6.val.flow_label), 2922 - ib_spec->type & IB_FLOW_SPEC_INNER); 2923 - break; 2924 - case IB_FLOW_SPEC_ESP: 2925 - if (ib_spec->esp.mask.seq) 2926 - return -EOPNOTSUPP; 2927 - 2928 - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 2929 - ntohl(ib_spec->esp.mask.spi)); 2930 - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 2931 - ntohl(ib_spec->esp.val.spi)); 2932 - break; 2933 - case IB_FLOW_SPEC_TCP: 2934 - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 2935 - LAST_TCP_UDP_FIELD)) 2936 - return -EOPNOTSUPP; 2937 - 2938 - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) 2939 - return -EINVAL; 2940 - 2941 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, 2942 - ntohs(ib_spec->tcp_udp.mask.src_port)); 2943 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, 2944 - ntohs(ib_spec->tcp_udp.val.src_port)); 2945 - 2946 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, 2947 - ntohs(ib_spec->tcp_udp.mask.dst_port)); 2948 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, 2949 - ntohs(ib_spec->tcp_udp.val.dst_port)); 2950 - break; 2951 - case IB_FLOW_SPEC_UDP: 2952 - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 2953 - LAST_TCP_UDP_FIELD)) 2954 - return -EOPNOTSUPP; 2955 - 2956 - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) 2957 - return -EINVAL; 2958 - 2959 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, 2960 - ntohs(ib_spec->tcp_udp.mask.src_port)); 2961 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, 2962 - ntohs(ib_spec->tcp_udp.val.src_port)); 2963 - 2964 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, 2965 - ntohs(ib_spec->tcp_udp.mask.dst_port)); 2966 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 2967 - ntohs(ib_spec->tcp_udp.val.dst_port)); 2968 - break; 2969 - case IB_FLOW_SPEC_GRE: 2970 - if (ib_spec->gre.mask.c_ks_res0_ver) 2971 - return -EOPNOTSUPP; 2972 - 2973 - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) 2974 - return -EINVAL; 2975 - 2976 - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2977 - 0xff); 2978 - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2979 - IPPROTO_GRE); 2980 - 2981 - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, 2982 - ntohs(ib_spec->gre.mask.protocol)); 2983 - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, 2984 - ntohs(ib_spec->gre.val.protocol)); 2985 - 2986 - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, 2987 - gre_key.nvgre.hi), 2988 - &ib_spec->gre.mask.key, 2989 - sizeof(ib_spec->gre.mask.key)); 2990 - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, 2991 - gre_key.nvgre.hi), 2992 - &ib_spec->gre.val.key, 2993 - sizeof(ib_spec->gre.val.key)); 2994 - break; 2995 - case IB_FLOW_SPEC_MPLS: 2996 - switch (prev_type) { 2997 - case IB_FLOW_SPEC_UDP: 2998 - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2999 - ft_field_support.outer_first_mpls_over_udp), 3000 - &ib_spec->mpls.mask.tag)) 3001 - return -EOPNOTSUPP; 3002 - 3003 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 3004 - outer_first_mpls_over_udp), 3005 - &ib_spec->mpls.val.tag, 3006 - sizeof(ib_spec->mpls.val.tag)); 3007 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 3008 - outer_first_mpls_over_udp), 3009 - &ib_spec->mpls.mask.tag, 3010 - sizeof(ib_spec->mpls.mask.tag)); 3011 - break; 3012 - case IB_FLOW_SPEC_GRE: 3013 - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 3014 - ft_field_support.outer_first_mpls_over_gre), 3015 - &ib_spec->mpls.mask.tag)) 3016 - return -EOPNOTSUPP; 3017 - 3018 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 3019 - outer_first_mpls_over_gre), 3020 - &ib_spec->mpls.val.tag, 3021 - sizeof(ib_spec->mpls.val.tag)); 3022 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 3023 - outer_first_mpls_over_gre), 3024 - &ib_spec->mpls.mask.tag, 3025 - sizeof(ib_spec->mpls.mask.tag)); 3026 - break; 3027 - default: 3028 - if (ib_spec->type & IB_FLOW_SPEC_INNER) { 3029 - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 3030 - ft_field_support.inner_first_mpls), 3031 - &ib_spec->mpls.mask.tag)) 3032 - return -EOPNOTSUPP; 3033 - 3034 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 3035 - inner_first_mpls), 3036 - &ib_spec->mpls.val.tag, 3037 - sizeof(ib_spec->mpls.val.tag)); 3038 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 3039 - inner_first_mpls), 3040 - &ib_spec->mpls.mask.tag, 3041 - sizeof(ib_spec->mpls.mask.tag)); 3042 - } else { 3043 - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 3044 - ft_field_support.outer_first_mpls), 3045 - &ib_spec->mpls.mask.tag)) 3046 - return -EOPNOTSUPP; 3047 - 3048 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, 3049 - outer_first_mpls), 3050 - &ib_spec->mpls.val.tag, 3051 - sizeof(ib_spec->mpls.val.tag)); 3052 - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, 3053 - outer_first_mpls), 3054 - &ib_spec->mpls.mask.tag, 3055 - sizeof(ib_spec->mpls.mask.tag)); 3056 - } 3057 - } 3058 - break; 3059 - case IB_FLOW_SPEC_VXLAN_TUNNEL: 3060 - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 3061 - LAST_TUNNEL_FIELD)) 3062 - return -EOPNOTSUPP; 3063 - 3064 - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 3065 - ntohl(ib_spec->tunnel.mask.tunnel_id)); 3066 - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 3067 - ntohl(ib_spec->tunnel.val.tunnel_id)); 3068 - break; 3069 - case IB_FLOW_SPEC_ACTION_TAG: 3070 - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, 3071 - LAST_FLOW_TAG_FIELD)) 3072 - return -EOPNOTSUPP; 3073 - if (ib_spec->flow_tag.tag_id >= BIT(24)) 3074 - return -EINVAL; 3075 - 3076 - flow_context->flow_tag = ib_spec->flow_tag.tag_id; 3077 - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 3078 - break; 3079 - case IB_FLOW_SPEC_ACTION_DROP: 3080 - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, 3081 - LAST_DROP_FIELD)) 3082 - return -EOPNOTSUPP; 3083 - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 3084 - break; 3085 - case IB_FLOW_SPEC_ACTION_HANDLE: 3086 - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), 3087 - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); 3088 - if (ret) 3089 - return ret; 3090 - break; 3091 - case IB_FLOW_SPEC_ACTION_COUNT: 3092 - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, 3093 - LAST_COUNTERS_FIELD)) 3094 - return -EOPNOTSUPP; 3095 - 3096 - /* for now support only one counters spec per flow */ 3097 - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 3098 - return -EINVAL; 3099 - 3100 - action->counters = ib_spec->flow_count.counters; 3101 - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3102 - break; 3103 - default: 3104 - return -EINVAL; 3105 - } 3106 - 3107 - return 0; 3108 - } 3109 - 3110 - /* If a flow could catch both multicast and unicast packets, 3111 - * it won't fall into the multicast flow steering table and this rule 3112 - * could steal other multicast packets. 3113 - */ 3114 - static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) 3115 - { 3116 - union ib_flow_spec *flow_spec; 3117 - 3118 - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || 3119 - ib_attr->num_of_specs < 1) 3120 - return false; 3121 - 3122 - flow_spec = (union ib_flow_spec *)(ib_attr + 1); 3123 - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { 3124 - struct ib_flow_spec_ipv4 *ipv4_spec; 3125 - 3126 - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; 3127 - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) 3128 - return true; 3129 - 3130 - return false; 3131 - } 3132 - 3133 - if (flow_spec->type == IB_FLOW_SPEC_ETH) { 3134 - struct ib_flow_spec_eth *eth_spec; 3135 - 3136 - eth_spec = (struct ib_flow_spec_eth *)flow_spec; 3137 - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && 3138 - is_multicast_ether_addr(eth_spec->val.dst_mac); 3139 - } 3140 - 3141 - return false; 3142 - } 3143 - 3144 - enum valid_spec { 3145 - VALID_SPEC_INVALID, 3146 - VALID_SPEC_VALID, 3147 - VALID_SPEC_NA, 3148 - }; 3149 - 3150 - static enum valid_spec 3151 - is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, 3152 - const struct mlx5_flow_spec *spec, 3153 - const struct mlx5_flow_act *flow_act, 3154 - bool egress) 3155 - { 3156 - const u32 *match_c = spec->match_criteria; 3157 - bool is_crypto = 3158 - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | 3159 - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); 3160 - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); 3161 - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; 3162 - 3163 - /* 3164 - * Currently only crypto is supported in egress, when regular egress 3165 - * rules would be supported, always return VALID_SPEC_NA. 3166 - */ 3167 - if (!is_crypto) 3168 - return VALID_SPEC_NA; 3169 - 3170 - return is_crypto && is_ipsec && 3171 - (!egress || (!is_drop && 3172 - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? 3173 - VALID_SPEC_VALID : VALID_SPEC_INVALID; 3174 - } 3175 - 3176 - static bool is_valid_spec(struct mlx5_core_dev *mdev, 3177 - const struct mlx5_flow_spec *spec, 3178 - const struct mlx5_flow_act *flow_act, 3179 - bool egress) 3180 - { 3181 - /* We curretly only support ipsec egress flow */ 3182 - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; 3183 - } 3184 - 3185 - static bool is_valid_ethertype(struct mlx5_core_dev *mdev, 3186 - const struct ib_flow_attr *flow_attr, 3187 - bool check_inner) 3188 - { 3189 - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); 3190 - int match_ipv = check_inner ? 3191 - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 3192 - ft_field_support.inner_ip_version) : 3193 - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 3194 - ft_field_support.outer_ip_version); 3195 - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; 3196 - bool ipv4_spec_valid, ipv6_spec_valid; 3197 - unsigned int ip_spec_type = 0; 3198 - bool has_ethertype = false; 3199 - unsigned int spec_index; 3200 - bool mask_valid = true; 3201 - u16 eth_type = 0; 3202 - bool type_valid; 3203 - 3204 - /* Validate that ethertype is correct */ 3205 - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 3206 - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && 3207 - ib_spec->eth.mask.ether_type) { 3208 - mask_valid = (ib_spec->eth.mask.ether_type == 3209 - htons(0xffff)); 3210 - has_ethertype = true; 3211 - eth_type = ntohs(ib_spec->eth.val.ether_type); 3212 - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || 3213 - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { 3214 - ip_spec_type = ib_spec->type; 3215 - } 3216 - ib_spec = (void *)ib_spec + ib_spec->size; 3217 - } 3218 - 3219 - type_valid = (!has_ethertype) || (!ip_spec_type); 3220 - if (!type_valid && mask_valid) { 3221 - ipv4_spec_valid = (eth_type == ETH_P_IP) && 3222 - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); 3223 - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && 3224 - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); 3225 - 3226 - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || 3227 - (((eth_type == ETH_P_MPLS_UC) || 3228 - (eth_type == ETH_P_MPLS_MC)) && match_ipv); 3229 - } 3230 - 3231 - return type_valid; 3232 - } 3233 - 3234 - static bool is_valid_attr(struct mlx5_core_dev *mdev, 3235 - const struct ib_flow_attr *flow_attr) 3236 - { 3237 - return is_valid_ethertype(mdev, flow_attr, false) && 3238 - is_valid_ethertype(mdev, flow_attr, true); 3239 - } 3240 - 3241 - static void put_flow_table(struct mlx5_ib_dev *dev, 3242 - struct mlx5_ib_flow_prio *prio, bool ft_added) 3243 - { 3244 - prio->refcount -= !!ft_added; 3245 - if (!prio->refcount) { 3246 - mlx5_destroy_flow_table(prio->flow_table); 3247 - prio->flow_table = NULL; 3248 - } 3249 - } 3250 - 3251 - static void counters_clear_description(struct ib_counters *counters) 3252 - { 3253 - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 3254 - 3255 - mutex_lock(&mcounters->mcntrs_mutex); 3256 - kfree(mcounters->counters_data); 3257 - mcounters->counters_data = NULL; 3258 - mcounters->cntrs_max_index = 0; 3259 - mutex_unlock(&mcounters->mcntrs_mutex); 3260 - } 3261 - 3262 - static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 3263 - { 3264 - struct mlx5_ib_flow_handler *handler = container_of(flow_id, 3265 - struct mlx5_ib_flow_handler, 3266 - ibflow); 3267 - struct mlx5_ib_flow_handler *iter, *tmp; 3268 - struct mlx5_ib_dev *dev = handler->dev; 3269 - 3270 - mutex_lock(&dev->flow_db->lock); 3271 - 3272 - list_for_each_entry_safe(iter, tmp, &handler->list, list) { 3273 - mlx5_del_flow_rules(iter->rule); 3274 - put_flow_table(dev, iter->prio, true); 3275 - list_del(&iter->list); 3276 - kfree(iter); 3277 - } 3278 - 3279 - mlx5_del_flow_rules(handler->rule); 3280 - put_flow_table(dev, handler->prio, true); 3281 - if (handler->ibcounters && 3282 - atomic_read(&handler->ibcounters->usecnt) == 1) 3283 - counters_clear_description(handler->ibcounters); 3284 - 3285 - mutex_unlock(&dev->flow_db->lock); 3286 - if (handler->flow_matcher) 3287 - atomic_dec(&handler->flow_matcher->usecnt); 3288 - kfree(handler); 3289 - 3290 - return 0; 3291 - } 3292 - 3293 - static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 3294 - { 3295 - priority *= 2; 3296 - if (!dont_trap) 3297 - priority++; 3298 - return priority; 3299 - } 3300 - 3301 - enum flow_table_type { 3302 - MLX5_IB_FT_RX, 3303 - MLX5_IB_FT_TX 3304 - }; 3305 - 3306 - #define MLX5_FS_MAX_TYPES 6 3307 - #define MLX5_FS_MAX_ENTRIES BIT(16) 3308 - 3309 - static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, 3310 - struct mlx5_ib_flow_prio *prio, 3311 - int priority, 3312 - int num_entries, int num_groups, 3313 - u32 flags) 3314 - { 3315 - struct mlx5_flow_table_attr ft_attr = {}; 3316 - struct mlx5_flow_table *ft; 3317 - 3318 - ft_attr.prio = priority; 3319 - ft_attr.max_fte = num_entries; 3320 - ft_attr.flags = flags; 3321 - ft_attr.autogroup.max_num_groups = num_groups; 3322 - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 3323 - if (IS_ERR(ft)) 3324 - return ERR_CAST(ft); 3325 - 3326 - prio->flow_table = ft; 3327 - prio->refcount = 0; 3328 - return prio; 3329 - } 3330 - 3331 - static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 3332 - struct ib_flow_attr *flow_attr, 3333 - enum flow_table_type ft_type) 3334 - { 3335 - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 3336 - struct mlx5_flow_namespace *ns = NULL; 3337 - struct mlx5_ib_flow_prio *prio; 3338 - struct mlx5_flow_table *ft; 3339 - int max_table_size; 3340 - int num_entries; 3341 - int num_groups; 3342 - bool esw_encap; 3343 - u32 flags = 0; 3344 - int priority; 3345 - 3346 - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3347 - log_max_ft_size)); 3348 - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != 3349 - DEVLINK_ESWITCH_ENCAP_MODE_NONE; 3350 - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 3351 - enum mlx5_flow_namespace_type fn_type; 3352 - 3353 - if (flow_is_multicast_only(flow_attr) && 3354 - !dont_trap) 3355 - priority = MLX5_IB_FLOW_MCAST_PRIO; 3356 - else 3357 - priority = ib_prio_to_core_prio(flow_attr->priority, 3358 - dont_trap); 3359 - if (ft_type == MLX5_IB_FT_RX) { 3360 - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; 3361 - prio = &dev->flow_db->prios[priority]; 3362 - if (!dev->is_rep && !esw_encap && 3363 - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) 3364 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 3365 - if (!dev->is_rep && !esw_encap && 3366 - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3367 - reformat_l3_tunnel_to_l2)) 3368 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3369 - } else { 3370 - max_table_size = 3371 - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 3372 - log_max_ft_size)); 3373 - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; 3374 - prio = &dev->flow_db->egress_prios[priority]; 3375 - if (!dev->is_rep && !esw_encap && 3376 - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) 3377 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3378 - } 3379 - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); 3380 - num_entries = MLX5_FS_MAX_ENTRIES; 3381 - num_groups = MLX5_FS_MAX_TYPES; 3382 - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3383 - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 3384 - ns = mlx5_get_flow_namespace(dev->mdev, 3385 - MLX5_FLOW_NAMESPACE_LEFTOVERS); 3386 - build_leftovers_ft_param(&priority, 3387 - &num_entries, 3388 - &num_groups); 3389 - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; 3390 - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 3391 - if (!MLX5_CAP_FLOWTABLE(dev->mdev, 3392 - allow_sniffer_and_nic_rx_shared_tir)) 3393 - return ERR_PTR(-ENOTSUPP); 3394 - 3395 - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? 3396 - MLX5_FLOW_NAMESPACE_SNIFFER_RX : 3397 - MLX5_FLOW_NAMESPACE_SNIFFER_TX); 3398 - 3399 - prio = &dev->flow_db->sniffer[ft_type]; 3400 - priority = 0; 3401 - num_entries = 1; 3402 - num_groups = 1; 3403 - } 3404 - 3405 - if (!ns) 3406 - return ERR_PTR(-ENOTSUPP); 3407 - 3408 - max_table_size = min_t(int, num_entries, max_table_size); 3409 - 3410 - ft = prio->flow_table; 3411 - if (!ft) 3412 - return _get_prio(ns, prio, priority, max_table_size, num_groups, 3413 - flags); 3414 - 3415 - return prio; 3416 - } 3417 - 3418 - static void set_underlay_qp(struct mlx5_ib_dev *dev, 3419 - struct mlx5_flow_spec *spec, 3420 - u32 underlay_qpn) 3421 - { 3422 - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, 3423 - spec->match_criteria, 3424 - misc_parameters); 3425 - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 3426 - misc_parameters); 3427 - 3428 - if (underlay_qpn && 3429 - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3430 - ft_field_support.bth_dst_qp)) { 3431 - MLX5_SET(fte_match_set_misc, 3432 - misc_params_v, bth_dst_qp, underlay_qpn); 3433 - MLX5_SET(fte_match_set_misc, 3434 - misc_params_c, bth_dst_qp, 0xffffff); 3435 - } 3436 - } 3437 - 3438 - static int read_flow_counters(struct ib_device *ibdev, 3439 - struct mlx5_read_counters_attr *read_attr) 3440 - { 3441 - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; 3442 - struct mlx5_ib_dev *dev = to_mdev(ibdev); 3443 - 3444 - return mlx5_fc_query(dev->mdev, fc, 3445 - &read_attr->out[IB_COUNTER_PACKETS], 3446 - &read_attr->out[IB_COUNTER_BYTES]); 3447 - } 3448 - 3449 - /* flow counters currently expose two counters packets and bytes */ 3450 - #define FLOW_COUNTERS_NUM 2 3451 - static int counters_set_description(struct ib_counters *counters, 3452 - enum mlx5_ib_counters_type counters_type, 3453 - struct mlx5_ib_flow_counters_desc *desc_data, 3454 - u32 ncounters) 3455 - { 3456 - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 3457 - u32 cntrs_max_index = 0; 3458 - int i; 3459 - 3460 - if (counters_type != MLX5_IB_COUNTERS_FLOW) 3461 - return -EINVAL; 3462 - 3463 - /* init the fields for the object */ 3464 - mcounters->type = counters_type; 3465 - mcounters->read_counters = read_flow_counters; 3466 - mcounters->counters_num = FLOW_COUNTERS_NUM; 3467 - mcounters->ncounters = ncounters; 3468 - /* each counter entry have both description and index pair */ 3469 - for (i = 0; i < ncounters; i++) { 3470 - if (desc_data[i].description > IB_COUNTER_BYTES) 3471 - return -EINVAL; 3472 - 3473 - if (cntrs_max_index <= desc_data[i].index) 3474 - cntrs_max_index = desc_data[i].index + 1; 3475 - } 3476 - 3477 - mutex_lock(&mcounters->mcntrs_mutex); 3478 - mcounters->counters_data = desc_data; 3479 - mcounters->cntrs_max_index = cntrs_max_index; 3480 - mutex_unlock(&mcounters->mcntrs_mutex); 3481 - 3482 - return 0; 3483 - } 3484 - 3485 - #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) 3486 - static int flow_counters_set_data(struct ib_counters *ibcounters, 3487 - struct mlx5_ib_create_flow *ucmd) 3488 - { 3489 - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); 3490 - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; 3491 - struct mlx5_ib_flow_counters_desc *desc_data = NULL; 3492 - bool hw_hndl = false; 3493 - int ret = 0; 3494 - 3495 - if (ucmd && ucmd->ncounters_data != 0) { 3496 - cntrs_data = ucmd->data; 3497 - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) 3498 - return -EINVAL; 3499 - 3500 - desc_data = kcalloc(cntrs_data->ncounters, 3501 - sizeof(*desc_data), 3502 - GFP_KERNEL); 3503 - if (!desc_data) 3504 - return -ENOMEM; 3505 - 3506 - if (copy_from_user(desc_data, 3507 - u64_to_user_ptr(cntrs_data->counters_data), 3508 - sizeof(*desc_data) * cntrs_data->ncounters)) { 3509 - ret = -EFAULT; 3510 - goto free; 3511 - } 3512 - } 3513 - 3514 - if (!mcounters->hw_cntrs_hndl) { 3515 - mcounters->hw_cntrs_hndl = mlx5_fc_create( 3516 - to_mdev(ibcounters->device)->mdev, false); 3517 - if (IS_ERR(mcounters->hw_cntrs_hndl)) { 3518 - ret = PTR_ERR(mcounters->hw_cntrs_hndl); 3519 - goto free; 3520 - } 3521 - hw_hndl = true; 3522 - } 3523 - 3524 - if (desc_data) { 3525 - /* counters already bound to at least one flow */ 3526 - if (mcounters->cntrs_max_index) { 3527 - ret = -EINVAL; 3528 - goto free_hndl; 3529 - } 3530 - 3531 - ret = counters_set_description(ibcounters, 3532 - MLX5_IB_COUNTERS_FLOW, 3533 - desc_data, 3534 - cntrs_data->ncounters); 3535 - if (ret) 3536 - goto free_hndl; 3537 - 3538 - } else if (!mcounters->cntrs_max_index) { 3539 - /* counters not bound yet, must have udata passed */ 3540 - ret = -EINVAL; 3541 - goto free_hndl; 3542 - } 3543 - 3544 - return 0; 3545 - 3546 - free_hndl: 3547 - if (hw_hndl) { 3548 - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, 3549 - mcounters->hw_cntrs_hndl); 3550 - mcounters->hw_cntrs_hndl = NULL; 3551 - } 3552 - free: 3553 - kfree(desc_data); 3554 - return ret; 3555 - } 3556 - 3557 - static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, 3558 - struct mlx5_flow_spec *spec, 3559 - struct mlx5_eswitch_rep *rep) 3560 - { 3561 - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; 3562 - void *misc; 3563 - 3564 - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { 3565 - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 3566 - misc_parameters_2); 3567 - 3568 - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 3569 - mlx5_eswitch_get_vport_metadata_for_match(esw, 3570 - rep->vport)); 3571 - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 3572 - misc_parameters_2); 3573 - 3574 - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 3575 - mlx5_eswitch_get_vport_metadata_mask()); 3576 - } else { 3577 - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 3578 - misc_parameters); 3579 - 3580 - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); 3581 - 3582 - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 3583 - misc_parameters); 3584 - 3585 - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); 3586 - } 3587 - } 3588 - 3589 - static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, 3590 - struct mlx5_ib_flow_prio *ft_prio, 3591 - const struct ib_flow_attr *flow_attr, 3592 - struct mlx5_flow_destination *dst, 3593 - u32 underlay_qpn, 3594 - struct mlx5_ib_create_flow *ucmd) 3595 - { 3596 - struct mlx5_flow_table *ft = ft_prio->flow_table; 3597 - struct mlx5_ib_flow_handler *handler; 3598 - struct mlx5_flow_act flow_act = {}; 3599 - struct mlx5_flow_spec *spec; 3600 - struct mlx5_flow_destination dest_arr[2] = {}; 3601 - struct mlx5_flow_destination *rule_dst = dest_arr; 3602 - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); 3603 - unsigned int spec_index; 3604 - u32 prev_type = 0; 3605 - int err = 0; 3606 - int dest_num = 0; 3607 - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; 3608 - 3609 - if (!is_valid_attr(dev->mdev, flow_attr)) 3610 - return ERR_PTR(-EINVAL); 3611 - 3612 - if (dev->is_rep && is_egress) 3613 - return ERR_PTR(-EINVAL); 3614 - 3615 - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 3616 - handler = kzalloc(sizeof(*handler), GFP_KERNEL); 3617 - if (!handler || !spec) { 3618 - err = -ENOMEM; 3619 - goto free; 3620 - } 3621 - 3622 - INIT_LIST_HEAD(&handler->list); 3623 - 3624 - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 3625 - err = parse_flow_attr(dev->mdev, spec, 3626 - ib_flow, flow_attr, &flow_act, 3627 - prev_type); 3628 - if (err < 0) 3629 - goto free; 3630 - 3631 - prev_type = ((union ib_flow_spec *)ib_flow)->type; 3632 - ib_flow += ((union ib_flow_spec *)ib_flow)->size; 3633 - } 3634 - 3635 - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { 3636 - memcpy(&dest_arr[0], dst, sizeof(*dst)); 3637 - dest_num++; 3638 - } 3639 - 3640 - if (!flow_is_multicast_only(flow_attr)) 3641 - set_underlay_qp(dev, spec, underlay_qpn); 3642 - 3643 - if (dev->is_rep) { 3644 - struct mlx5_eswitch_rep *rep; 3645 - 3646 - rep = dev->port[flow_attr->port - 1].rep; 3647 - if (!rep) { 3648 - err = -EINVAL; 3649 - goto free; 3650 - } 3651 - 3652 - mlx5_ib_set_rule_source_port(dev, spec, rep); 3653 - } 3654 - 3655 - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 3656 - 3657 - if (is_egress && 3658 - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { 3659 - err = -EINVAL; 3660 - goto free; 3661 - } 3662 - 3663 - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 3664 - struct mlx5_ib_mcounters *mcounters; 3665 - 3666 - err = flow_counters_set_data(flow_act.counters, ucmd); 3667 - if (err) 3668 - goto free; 3669 - 3670 - mcounters = to_mcounters(flow_act.counters); 3671 - handler->ibcounters = flow_act.counters; 3672 - dest_arr[dest_num].type = 3673 - MLX5_FLOW_DESTINATION_TYPE_COUNTER; 3674 - dest_arr[dest_num].counter_id = 3675 - mlx5_fc_id(mcounters->hw_cntrs_hndl); 3676 - dest_num++; 3677 - } 3678 - 3679 - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { 3680 - if (!dest_num) 3681 - rule_dst = NULL; 3682 - } else { 3683 - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) 3684 - flow_act.action |= 3685 - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 3686 - if (is_egress) 3687 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; 3688 - else if (dest_num) 3689 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3690 - } 3691 - 3692 - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && 3693 - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3694 - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { 3695 - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", 3696 - spec->flow_context.flow_tag, flow_attr->type); 3697 - err = -EINVAL; 3698 - goto free; 3699 - } 3700 - handler->rule = mlx5_add_flow_rules(ft, spec, 3701 - &flow_act, 3702 - rule_dst, dest_num); 3703 - 3704 - if (IS_ERR(handler->rule)) { 3705 - err = PTR_ERR(handler->rule); 3706 - goto free; 3707 - } 3708 - 3709 - ft_prio->refcount++; 3710 - handler->prio = ft_prio; 3711 - handler->dev = dev; 3712 - 3713 - ft_prio->flow_table = ft; 3714 - free: 3715 - if (err && handler) { 3716 - if (handler->ibcounters && 3717 - atomic_read(&handler->ibcounters->usecnt) == 1) 3718 - counters_clear_description(handler->ibcounters); 3719 - kfree(handler); 3720 - } 3721 - kvfree(spec); 3722 - return err ? ERR_PTR(err) : handler; 3723 - } 3724 - 3725 - static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, 3726 - struct mlx5_ib_flow_prio *ft_prio, 3727 - const struct ib_flow_attr *flow_attr, 3728 - struct mlx5_flow_destination *dst) 3729 - { 3730 - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); 3731 - } 3732 - 3733 - enum { 3734 - LEFTOVERS_MC, 3735 - LEFTOVERS_UC, 3736 - }; 3737 - 3738 - static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, 3739 - struct mlx5_ib_flow_prio *ft_prio, 3740 - struct ib_flow_attr *flow_attr, 3741 - struct mlx5_flow_destination *dst) 3742 - { 3743 - struct mlx5_ib_flow_handler *handler_ucast = NULL; 3744 - struct mlx5_ib_flow_handler *handler = NULL; 3745 - 3746 - static struct { 3747 - struct ib_flow_attr flow_attr; 3748 - struct ib_flow_spec_eth eth_flow; 3749 - } leftovers_specs[] = { 3750 - [LEFTOVERS_MC] = { 3751 - .flow_attr = { 3752 - .num_of_specs = 1, 3753 - .size = sizeof(leftovers_specs[0]) 3754 - }, 3755 - .eth_flow = { 3756 - .type = IB_FLOW_SPEC_ETH, 3757 - .size = sizeof(struct ib_flow_spec_eth), 3758 - .mask = {.dst_mac = {0x1} }, 3759 - .val = {.dst_mac = {0x1} } 3760 - } 3761 - }, 3762 - [LEFTOVERS_UC] = { 3763 - .flow_attr = { 3764 - .num_of_specs = 1, 3765 - .size = sizeof(leftovers_specs[0]) 3766 - }, 3767 - .eth_flow = { 3768 - .type = IB_FLOW_SPEC_ETH, 3769 - .size = sizeof(struct ib_flow_spec_eth), 3770 - .mask = {.dst_mac = {0x1} }, 3771 - .val = {.dst_mac = {} } 3772 - } 3773 - } 3774 - }; 3775 - 3776 - handler = create_flow_rule(dev, ft_prio, 3777 - &leftovers_specs[LEFTOVERS_MC].flow_attr, 3778 - dst); 3779 - if (!IS_ERR(handler) && 3780 - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { 3781 - handler_ucast = create_flow_rule(dev, ft_prio, 3782 - &leftovers_specs[LEFTOVERS_UC].flow_attr, 3783 - dst); 3784 - if (IS_ERR(handler_ucast)) { 3785 - mlx5_del_flow_rules(handler->rule); 3786 - ft_prio->refcount--; 3787 - kfree(handler); 3788 - handler = handler_ucast; 3789 - } else { 3790 - list_add(&handler_ucast->list, &handler->list); 3791 - } 3792 - } 3793 - 3794 - return handler; 3795 - } 3796 - 3797 - static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, 3798 - struct mlx5_ib_flow_prio *ft_rx, 3799 - struct mlx5_ib_flow_prio *ft_tx, 3800 - struct mlx5_flow_destination *dst) 3801 - { 3802 - struct mlx5_ib_flow_handler *handler_rx; 3803 - struct mlx5_ib_flow_handler *handler_tx; 3804 - int err; 3805 - static const struct ib_flow_attr flow_attr = { 3806 - .num_of_specs = 0, 3807 - .size = sizeof(flow_attr) 3808 - }; 3809 - 3810 - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); 3811 - if (IS_ERR(handler_rx)) { 3812 - err = PTR_ERR(handler_rx); 3813 - goto err; 3814 - } 3815 - 3816 - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); 3817 - if (IS_ERR(handler_tx)) { 3818 - err = PTR_ERR(handler_tx); 3819 - goto err_tx; 3820 - } 3821 - 3822 - list_add(&handler_tx->list, &handler_rx->list); 3823 - 3824 - return handler_rx; 3825 - 3826 - err_tx: 3827 - mlx5_del_flow_rules(handler_rx->rule); 3828 - ft_rx->refcount--; 3829 - kfree(handler_rx); 3830 - err: 3831 - return ERR_PTR(err); 3832 - } 3833 - 3834 - static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, 3835 - struct ib_flow_attr *flow_attr, 3836 - int domain, 3837 - struct ib_udata *udata) 3838 - { 3839 - struct mlx5_ib_dev *dev = to_mdev(qp->device); 3840 - struct mlx5_ib_qp *mqp = to_mqp(qp); 3841 - struct mlx5_ib_flow_handler *handler = NULL; 3842 - struct mlx5_flow_destination *dst = NULL; 3843 - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; 3844 - struct mlx5_ib_flow_prio *ft_prio; 3845 - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; 3846 - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; 3847 - size_t min_ucmd_sz, required_ucmd_sz; 3848 - int err; 3849 - int underlay_qpn; 3850 - 3851 - if (udata && udata->inlen) { 3852 - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + 3853 - sizeof(ucmd_hdr.reserved); 3854 - if (udata->inlen < min_ucmd_sz) 3855 - return ERR_PTR(-EOPNOTSUPP); 3856 - 3857 - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); 3858 - if (err) 3859 - return ERR_PTR(err); 3860 - 3861 - /* currently supports only one counters data */ 3862 - if (ucmd_hdr.ncounters_data > 1) 3863 - return ERR_PTR(-EINVAL); 3864 - 3865 - required_ucmd_sz = min_ucmd_sz + 3866 - sizeof(struct mlx5_ib_flow_counters_data) * 3867 - ucmd_hdr.ncounters_data; 3868 - if (udata->inlen > required_ucmd_sz && 3869 - !ib_is_udata_cleared(udata, required_ucmd_sz, 3870 - udata->inlen - required_ucmd_sz)) 3871 - return ERR_PTR(-EOPNOTSUPP); 3872 - 3873 - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); 3874 - if (!ucmd) 3875 - return ERR_PTR(-ENOMEM); 3876 - 3877 - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); 3878 - if (err) 3879 - goto free_ucmd; 3880 - } 3881 - 3882 - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { 3883 - err = -ENOMEM; 3884 - goto free_ucmd; 3885 - } 3886 - 3887 - if (domain != IB_FLOW_DOMAIN_USER || 3888 - flow_attr->port > dev->num_ports || 3889 - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | 3890 - IB_FLOW_ATTR_FLAGS_EGRESS))) { 3891 - err = -EINVAL; 3892 - goto free_ucmd; 3893 - } 3894 - 3895 - if (is_egress && 3896 - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3897 - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { 3898 - err = -EINVAL; 3899 - goto free_ucmd; 3900 - } 3901 - 3902 - dst = kzalloc(sizeof(*dst), GFP_KERNEL); 3903 - if (!dst) { 3904 - err = -ENOMEM; 3905 - goto free_ucmd; 3906 - } 3907 - 3908 - mutex_lock(&dev->flow_db->lock); 3909 - 3910 - ft_prio = get_flow_table(dev, flow_attr, 3911 - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); 3912 - if (IS_ERR(ft_prio)) { 3913 - err = PTR_ERR(ft_prio); 3914 - goto unlock; 3915 - } 3916 - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 3917 - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); 3918 - if (IS_ERR(ft_prio_tx)) { 3919 - err = PTR_ERR(ft_prio_tx); 3920 - ft_prio_tx = NULL; 3921 - goto destroy_ft; 3922 - } 3923 - } 3924 - 3925 - if (is_egress) { 3926 - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; 3927 - } else { 3928 - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; 3929 - if (mqp->is_rss) 3930 - dst->tir_num = mqp->rss_qp.tirn; 3931 - else 3932 - dst->tir_num = mqp->raw_packet_qp.rq.tirn; 3933 - } 3934 - 3935 - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 3936 - underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? 3937 - mqp->underlay_qpn : 3938 - 0; 3939 - handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, 3940 - underlay_qpn, ucmd); 3941 - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3942 - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 3943 - handler = create_leftovers_rule(dev, ft_prio, flow_attr, 3944 - dst); 3945 - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 3946 - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); 3947 - } else { 3948 - err = -EINVAL; 3949 - goto destroy_ft; 3950 - } 3951 - 3952 - if (IS_ERR(handler)) { 3953 - err = PTR_ERR(handler); 3954 - handler = NULL; 3955 - goto destroy_ft; 3956 - } 3957 - 3958 - mutex_unlock(&dev->flow_db->lock); 3959 - kfree(dst); 3960 - kfree(ucmd); 3961 - 3962 - return &handler->ibflow; 3963 - 3964 - destroy_ft: 3965 - put_flow_table(dev, ft_prio, false); 3966 - if (ft_prio_tx) 3967 - put_flow_table(dev, ft_prio_tx, false); 3968 - unlock: 3969 - mutex_unlock(&dev->flow_db->lock); 3970 - kfree(dst); 3971 - free_ucmd: 3972 - kfree(ucmd); 3973 - return ERR_PTR(err); 3974 - } 3975 - 3976 - static struct mlx5_ib_flow_prio * 3977 - _get_flow_table(struct mlx5_ib_dev *dev, 3978 - struct mlx5_ib_flow_matcher *fs_matcher, 3979 - bool mcast) 3980 - { 3981 - struct mlx5_flow_namespace *ns = NULL; 3982 - struct mlx5_ib_flow_prio *prio = NULL; 3983 - int max_table_size = 0; 3984 - bool esw_encap; 3985 - u32 flags = 0; 3986 - int priority; 3987 - 3988 - if (mcast) 3989 - priority = MLX5_IB_FLOW_MCAST_PRIO; 3990 - else 3991 - priority = ib_prio_to_core_prio(fs_matcher->priority, false); 3992 - 3993 - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != 3994 - DEVLINK_ESWITCH_ENCAP_MODE_NONE; 3995 - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { 3996 - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3997 - log_max_ft_size)); 3998 - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) 3999 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 4000 - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 4001 - reformat_l3_tunnel_to_l2) && 4002 - !esw_encap) 4003 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 4004 - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { 4005 - max_table_size = BIT( 4006 - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); 4007 - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) 4008 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 4009 - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { 4010 - max_table_size = BIT( 4011 - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); 4012 - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) 4013 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 4014 - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && 4015 - esw_encap) 4016 - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 4017 - priority = FDB_BYPASS_PATH; 4018 - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { 4019 - max_table_size = 4020 - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 4021 - log_max_ft_size)); 4022 - priority = fs_matcher->priority; 4023 - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { 4024 - max_table_size = 4025 - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 4026 - log_max_ft_size)); 4027 - priority = fs_matcher->priority; 4028 - } 4029 - 4030 - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); 4031 - 4032 - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); 4033 - if (!ns) 4034 - return ERR_PTR(-ENOTSUPP); 4035 - 4036 - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) 4037 - prio = &dev->flow_db->prios[priority]; 4038 - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) 4039 - prio = &dev->flow_db->egress_prios[priority]; 4040 - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) 4041 - prio = &dev->flow_db->fdb; 4042 - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) 4043 - prio = &dev->flow_db->rdma_rx[priority]; 4044 - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) 4045 - prio = &dev->flow_db->rdma_tx[priority]; 4046 - 4047 - if (!prio) 4048 - return ERR_PTR(-EINVAL); 4049 - 4050 - if (prio->flow_table) 4051 - return prio; 4052 - 4053 - return _get_prio(ns, prio, priority, max_table_size, 4054 - MLX5_FS_MAX_TYPES, flags); 4055 - } 4056 - 4057 - static struct mlx5_ib_flow_handler * 4058 - _create_raw_flow_rule(struct mlx5_ib_dev *dev, 4059 - struct mlx5_ib_flow_prio *ft_prio, 4060 - struct mlx5_flow_destination *dst, 4061 - struct mlx5_ib_flow_matcher *fs_matcher, 4062 - struct mlx5_flow_context *flow_context, 4063 - struct mlx5_flow_act *flow_act, 4064 - void *cmd_in, int inlen, 4065 - int dst_num) 4066 - { 4067 - struct mlx5_ib_flow_handler *handler; 4068 - struct mlx5_flow_spec *spec; 4069 - struct mlx5_flow_table *ft = ft_prio->flow_table; 4070 - int err = 0; 4071 - 4072 - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 4073 - handler = kzalloc(sizeof(*handler), GFP_KERNEL); 4074 - if (!handler || !spec) { 4075 - err = -ENOMEM; 4076 - goto free; 4077 - } 4078 - 4079 - INIT_LIST_HEAD(&handler->list); 4080 - 4081 - memcpy(spec->match_value, cmd_in, inlen); 4082 - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, 4083 - fs_matcher->mask_len); 4084 - spec->match_criteria_enable = fs_matcher->match_criteria_enable; 4085 - spec->flow_context = *flow_context; 4086 - 4087 - handler->rule = mlx5_add_flow_rules(ft, spec, 4088 - flow_act, dst, dst_num); 4089 - 4090 - if (IS_ERR(handler->rule)) { 4091 - err = PTR_ERR(handler->rule); 4092 - goto free; 4093 - } 4094 - 4095 - ft_prio->refcount++; 4096 - handler->prio = ft_prio; 4097 - handler->dev = dev; 4098 - ft_prio->flow_table = ft; 4099 - 4100 - free: 4101 - if (err) 4102 - kfree(handler); 4103 - kvfree(spec); 4104 - return err ? ERR_PTR(err) : handler; 4105 - } 4106 - 4107 - static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, 4108 - void *match_v) 4109 - { 4110 - void *match_c; 4111 - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; 4112 - void *dmac, *dmac_mask; 4113 - void *ipv4, *ipv4_mask; 4114 - 4115 - if (!(fs_matcher->match_criteria_enable & 4116 - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) 4117 - return false; 4118 - 4119 - match_c = fs_matcher->matcher_mask.match_params; 4120 - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, 4121 - outer_headers); 4122 - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, 4123 - outer_headers); 4124 - 4125 - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, 4126 - dmac_47_16); 4127 - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, 4128 - dmac_47_16); 4129 - 4130 - if (is_multicast_ether_addr(dmac) && 4131 - is_multicast_ether_addr(dmac_mask)) 4132 - return true; 4133 - 4134 - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, 4135 - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 4136 - 4137 - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, 4138 - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 4139 - 4140 - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && 4141 - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) 4142 - return true; 4143 - 4144 - return false; 4145 - } 4146 - 4147 - struct mlx5_ib_flow_handler * 4148 - mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, 4149 - struct mlx5_ib_flow_matcher *fs_matcher, 4150 - struct mlx5_flow_context *flow_context, 4151 - struct mlx5_flow_act *flow_act, 4152 - u32 counter_id, 4153 - void *cmd_in, int inlen, int dest_id, 4154 - int dest_type) 4155 - { 4156 - struct mlx5_flow_destination *dst; 4157 - struct mlx5_ib_flow_prio *ft_prio; 4158 - struct mlx5_ib_flow_handler *handler; 4159 - int dst_num = 0; 4160 - bool mcast; 4161 - int err; 4162 - 4163 - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) 4164 - return ERR_PTR(-EOPNOTSUPP); 4165 - 4166 - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) 4167 - return ERR_PTR(-ENOMEM); 4168 - 4169 - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); 4170 - if (!dst) 4171 - return ERR_PTR(-ENOMEM); 4172 - 4173 - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); 4174 - mutex_lock(&dev->flow_db->lock); 4175 - 4176 - ft_prio = _get_flow_table(dev, fs_matcher, mcast); 4177 - if (IS_ERR(ft_prio)) { 4178 - err = PTR_ERR(ft_prio); 4179 - goto unlock; 4180 - } 4181 - 4182 - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { 4183 - dst[dst_num].type = dest_type; 4184 - dst[dst_num++].tir_num = dest_id; 4185 - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 4186 - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { 4187 - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; 4188 - dst[dst_num++].ft_num = dest_id; 4189 - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 4190 - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { 4191 - dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; 4192 - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; 4193 - } 4194 - 4195 - 4196 - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 4197 - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 4198 - dst[dst_num].counter_id = counter_id; 4199 - dst_num++; 4200 - } 4201 - 4202 - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, 4203 - flow_context, flow_act, 4204 - cmd_in, inlen, dst_num); 4205 - 4206 - if (IS_ERR(handler)) { 4207 - err = PTR_ERR(handler); 4208 - goto destroy_ft; 4209 - } 4210 - 4211 - mutex_unlock(&dev->flow_db->lock); 4212 - atomic_inc(&fs_matcher->usecnt); 4213 - handler->flow_matcher = fs_matcher; 4214 - 4215 - kfree(dst); 4216 - 4217 - return handler; 4218 - 4219 - destroy_ft: 4220 - put_flow_table(dev, ft_prio, false); 4221 - unlock: 4222 - mutex_unlock(&dev->flow_db->lock); 4223 - kfree(dst); 4224 - 4225 - return ERR_PTR(err); 4226 - } 4227 - 4228 - static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) 4229 - { 4230 - u32 flags = 0; 4231 - 4232 - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) 4233 - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; 4234 - 4235 - return flags; 4236 - } 4237 - 4238 - #define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA 4239 - static struct ib_flow_action * 4240 - mlx5_ib_create_flow_action_esp(struct ib_device *device, 4241 - const struct ib_flow_action_attrs_esp *attr, 4242 - struct uverbs_attr_bundle *attrs) 4243 - { 4244 - struct mlx5_ib_dev *mdev = to_mdev(device); 4245 - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; 4246 - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; 4247 - struct mlx5_ib_flow_action *action; 4248 - u64 action_flags; 4249 - u64 flags; 4250 - int err = 0; 4251 - 4252 - err = uverbs_get_flags64( 4253 - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, 4254 - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); 4255 - if (err) 4256 - return ERR_PTR(err); 4257 - 4258 - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); 4259 - 4260 - /* We current only support a subset of the standard features. Only a 4261 - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn 4262 - * (with overlap). Full offload mode isn't supported. 4263 - */ 4264 - if (!attr->keymat || attr->replay || attr->encap || 4265 - attr->spi || attr->seq || attr->tfc_pad || 4266 - attr->hard_limit_pkts || 4267 - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 4268 - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) 4269 - return ERR_PTR(-EOPNOTSUPP); 4270 - 4271 - if (attr->keymat->protocol != 4272 - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) 4273 - return ERR_PTR(-EOPNOTSUPP); 4274 - 4275 - aes_gcm = &attr->keymat->keymat.aes_gcm; 4276 - 4277 - if (aes_gcm->icv_len != 16 || 4278 - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) 4279 - return ERR_PTR(-EOPNOTSUPP); 4280 - 4281 - action = kmalloc(sizeof(*action), GFP_KERNEL); 4282 - if (!action) 4283 - return ERR_PTR(-ENOMEM); 4284 - 4285 - action->esp_aes_gcm.ib_flags = attr->flags; 4286 - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, 4287 - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); 4288 - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; 4289 - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, 4290 - sizeof(accel_attrs.keymat.aes_gcm.salt)); 4291 - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, 4292 - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); 4293 - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; 4294 - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; 4295 - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; 4296 - 4297 - accel_attrs.esn = attr->esn; 4298 - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) 4299 - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; 4300 - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) 4301 - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 4302 - 4303 - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) 4304 - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; 4305 - 4306 - action->esp_aes_gcm.ctx = 4307 - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); 4308 - if (IS_ERR(action->esp_aes_gcm.ctx)) { 4309 - err = PTR_ERR(action->esp_aes_gcm.ctx); 4310 - goto err_parse; 4311 - } 4312 - 4313 - action->esp_aes_gcm.ib_flags = attr->flags; 4314 - 4315 - return &action->ib_action; 4316 - 4317 - err_parse: 4318 - kfree(action); 4319 - return ERR_PTR(err); 4320 - } 4321 - 4322 - static int 4323 - mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, 4324 - const struct ib_flow_action_attrs_esp *attr, 4325 - struct uverbs_attr_bundle *attrs) 4326 - { 4327 - struct mlx5_ib_flow_action *maction = to_mflow_act(action); 4328 - struct mlx5_accel_esp_xfrm_attrs accel_attrs; 4329 - int err = 0; 4330 - 4331 - if (attr->keymat || attr->replay || attr->encap || 4332 - attr->spi || attr->seq || attr->tfc_pad || 4333 - attr->hard_limit_pkts || 4334 - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 4335 - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | 4336 - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) 4337 - return -EOPNOTSUPP; 4338 - 4339 - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can 4340 - * be modified. 4341 - */ 4342 - if (!(maction->esp_aes_gcm.ib_flags & 4343 - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && 4344 - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | 4345 - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) 4346 - return -EINVAL; 4347 - 4348 - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, 4349 - sizeof(accel_attrs)); 4350 - 4351 - accel_attrs.esn = attr->esn; 4352 - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) 4353 - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 4354 - else 4355 - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; 4356 - 4357 - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, 4358 - &accel_attrs); 4359 - if (err) 4360 - return err; 4361 - 4362 - maction->esp_aes_gcm.ib_flags &= 4363 - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; 4364 - maction->esp_aes_gcm.ib_flags |= 4365 - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; 4366 - 4367 - return 0; 4368 - } 4369 - 4370 - static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) 4371 - { 4372 - struct mlx5_ib_flow_action *maction = to_mflow_act(action); 4373 - 4374 - switch (action->type) { 4375 - case IB_FLOW_ACTION_ESP: 4376 - /* 4377 - * We only support aes_gcm by now, so we implicitly know this is 4378 - * the underline crypto. 4379 - */ 4380 - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); 4381 - break; 4382 - case IB_FLOW_ACTION_UNSPECIFIED: 4383 - mlx5_ib_destroy_flow_action_raw(maction); 4384 - break; 4385 - default: 4386 - WARN_ON(true); 4387 - break; 4388 - } 4389 - 4390 - kfree(maction); 4391 - return 0; 4392 - } 4393 - 4394 2594 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 4395 2595 { 4396 2596 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); ··· 3020 4848 return __get_port_caps(dev, port); 3021 4849 } 3022 4850 3023 - static void destroy_umrc_res(struct mlx5_ib_dev *dev) 3024 - { 3025 - int err; 3026 - 3027 - err = mlx5_mr_cache_cleanup(dev); 3028 - if (err) 3029 - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 3030 - 3031 - if (dev->umrc.qp) 3032 - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); 3033 - if (dev->umrc.cq) 3034 - ib_free_cq(dev->umrc.cq); 3035 - if (dev->umrc.pd) 3036 - ib_dealloc_pd(dev->umrc.pd); 3037 - } 3038 - 3039 - enum { 3040 - MAX_UMR_WR = 128, 3041 - }; 3042 - 3043 - static int create_umr_res(struct mlx5_ib_dev *dev) 3044 - { 3045 - struct ib_qp_init_attr *init_attr = NULL; 3046 - struct ib_qp_attr *attr = NULL; 3047 - struct ib_pd *pd; 3048 - struct ib_cq *cq; 3049 - struct ib_qp *qp; 3050 - int ret; 3051 - 3052 - attr = kzalloc(sizeof(*attr), GFP_KERNEL); 3053 - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 3054 - if (!attr || !init_attr) { 3055 - ret = -ENOMEM; 3056 - goto error_0; 3057 - } 3058 - 3059 - pd = ib_alloc_pd(&dev->ib_dev, 0); 3060 - if (IS_ERR(pd)) { 3061 - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 3062 - ret = PTR_ERR(pd); 3063 - goto error_0; 3064 - } 3065 - 3066 - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 3067 - if (IS_ERR(cq)) { 3068 - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 3069 - ret = PTR_ERR(cq); 3070 - goto error_2; 3071 - } 3072 - 3073 - init_attr->send_cq = cq; 3074 - init_attr->recv_cq = cq; 3075 - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 3076 - init_attr->cap.max_send_wr = MAX_UMR_WR; 3077 - init_attr->cap.max_send_sge = 1; 3078 - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 3079 - init_attr->port_num = 1; 3080 - qp = mlx5_ib_create_qp(pd, init_attr, NULL); 3081 - if (IS_ERR(qp)) { 3082 - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 3083 - ret = PTR_ERR(qp); 3084 - goto error_3; 3085 - } 3086 - qp->device = &dev->ib_dev; 3087 - qp->real_qp = qp; 3088 - qp->uobject = NULL; 3089 - qp->qp_type = MLX5_IB_QPT_REG_UMR; 3090 - qp->send_cq = init_attr->send_cq; 3091 - qp->recv_cq = init_attr->recv_cq; 3092 - 3093 - attr->qp_state = IB_QPS_INIT; 3094 - attr->port_num = 1; 3095 - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | 3096 - IB_QP_PORT, NULL); 3097 - if (ret) { 3098 - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 3099 - goto error_4; 3100 - } 3101 - 3102 - memset(attr, 0, sizeof(*attr)); 3103 - attr->qp_state = IB_QPS_RTR; 3104 - attr->path_mtu = IB_MTU_256; 3105 - 3106 - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 3107 - if (ret) { 3108 - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 3109 - goto error_4; 3110 - } 3111 - 3112 - memset(attr, 0, sizeof(*attr)); 3113 - attr->qp_state = IB_QPS_RTS; 3114 - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 3115 - if (ret) { 3116 - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 3117 - goto error_4; 3118 - } 3119 - 3120 - dev->umrc.qp = qp; 3121 - dev->umrc.cq = cq; 3122 - dev->umrc.pd = pd; 3123 - 3124 - sema_init(&dev->umrc.sem, MAX_UMR_WR); 3125 - ret = mlx5_mr_cache_init(dev); 3126 - if (ret) { 3127 - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 3128 - goto error_4; 3129 - } 3130 - 3131 - kfree(attr); 3132 - kfree(init_attr); 3133 - 3134 - return 0; 3135 - 3136 - error_4: 3137 - mlx5_ib_destroy_qp(qp, NULL); 3138 - dev->umrc.qp = NULL; 3139 - 3140 - error_3: 3141 - ib_free_cq(cq); 3142 - dev->umrc.cq = NULL; 3143 - 3144 - error_2: 3145 - ib_dealloc_pd(pd); 3146 - dev->umrc.pd = NULL; 3147 - 3148 - error_0: 3149 - kfree(attr); 3150 - kfree(init_attr); 3151 - return ret; 3152 - } 3153 - 3154 4851 static u8 mlx5_get_umr_fence(u8 umr_fence_cap) 3155 4852 { 3156 4853 switch (umr_fence_cap) { ··· 3032 4991 } 3033 4992 } 3034 4993 3035 - static int create_dev_resources(struct mlx5_ib_resources *devr) 4994 + static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) 3036 4995 { 4996 + struct mlx5_ib_resources *devr = &dev->devr; 3037 4997 struct ib_srq_init_attr attr; 3038 - struct mlx5_ib_dev *dev; 3039 4998 struct ib_device *ibdev; 3040 4999 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 3041 5000 int port; 3042 5001 int ret = 0; 3043 5002 3044 - dev = container_of(devr, struct mlx5_ib_dev, devr); 3045 5003 ibdev = &dev->ib_dev; 5004 + 5005 + if (!MLX5_CAP_GEN(dev->mdev, xrc)) 5006 + return -EOPNOTSUPP; 3046 5007 3047 5008 mutex_init(&devr->mutex); 3048 5009 ··· 3073 5030 if (ret) 3074 5031 goto err_create_cq; 3075 5032 3076 - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); 3077 - if (IS_ERR(devr->x0)) { 3078 - ret = PTR_ERR(devr->x0); 5033 + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); 5034 + if (ret) 3079 5035 goto error2; 3080 - } 3081 - devr->x0->device = &dev->ib_dev; 3082 - devr->x0->inode = NULL; 3083 - atomic_set(&devr->x0->usecnt, 0); 3084 - mutex_init(&devr->x0->tgt_qp_mutex); 3085 - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 3086 5036 3087 - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); 3088 - if (IS_ERR(devr->x1)) { 3089 - ret = PTR_ERR(devr->x1); 5037 + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); 5038 + if (ret) 3090 5039 goto error3; 3091 - } 3092 - devr->x1->device = &dev->ib_dev; 3093 - devr->x1->inode = NULL; 3094 - atomic_set(&devr->x1->usecnt, 0); 3095 - mutex_init(&devr->x1->tgt_qp_mutex); 3096 - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 3097 5040 3098 5041 memset(&attr, 0, sizeof(attr)); 3099 5042 attr.attr.max_sge = 1; 3100 5043 attr.attr.max_wr = 1; 3101 5044 attr.srq_type = IB_SRQT_XRC; 3102 5045 attr.ext.cq = devr->c0; 3103 - attr.ext.xrc.xrcd = devr->x0; 3104 5046 3105 5047 devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); 3106 5048 if (!devr->s0) { ··· 3096 5068 devr->s0->device = &dev->ib_dev; 3097 5069 devr->s0->pd = devr->p0; 3098 5070 devr->s0->srq_type = IB_SRQT_XRC; 3099 - devr->s0->ext.xrc.xrcd = devr->x0; 3100 5071 devr->s0->ext.cq = devr->c0; 3101 5072 ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); 3102 5073 if (ret) 3103 5074 goto err_create; 3104 5075 3105 - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 3106 5076 atomic_inc(&devr->s0->ext.cq->usecnt); 3107 5077 atomic_inc(&devr->p0->usecnt); 3108 5078 atomic_set(&devr->s0->usecnt, 0); ··· 3142 5116 err_create: 3143 5117 kfree(devr->s0); 3144 5118 error4: 3145 - mlx5_ib_dealloc_xrcd(devr->x1, NULL); 5119 + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); 3146 5120 error3: 3147 - mlx5_ib_dealloc_xrcd(devr->x0, NULL); 5121 + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); 3148 5122 error2: 3149 5123 mlx5_ib_destroy_cq(devr->c0, NULL); 3150 5124 err_create_cq: ··· 3156 5130 return ret; 3157 5131 } 3158 5132 3159 - static void destroy_dev_resources(struct mlx5_ib_resources *devr) 5133 + static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) 3160 5134 { 5135 + struct mlx5_ib_resources *devr = &dev->devr; 3161 5136 int port; 3162 5137 3163 5138 mlx5_ib_destroy_srq(devr->s1, NULL); 3164 5139 kfree(devr->s1); 3165 5140 mlx5_ib_destroy_srq(devr->s0, NULL); 3166 5141 kfree(devr->s0); 3167 - mlx5_ib_dealloc_xrcd(devr->x0, NULL); 3168 - mlx5_ib_dealloc_xrcd(devr->x1, NULL); 5142 + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); 5143 + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); 3169 5144 mlx5_ib_destroy_cq(devr->c0, NULL); 3170 5145 kfree(devr->c0); 3171 5146 mlx5_ib_dealloc_pd(devr->p0, NULL); ··· 3359 5332 mlx5_nic_vport_disable_roce(dev->mdev); 3360 5333 } 3361 5334 3362 - struct mlx5_ib_counter { 3363 - const char *name; 3364 - size_t offset; 3365 - }; 3366 - 3367 - #define INIT_Q_COUNTER(_name) \ 3368 - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} 3369 - 3370 - static const struct mlx5_ib_counter basic_q_cnts[] = { 3371 - INIT_Q_COUNTER(rx_write_requests), 3372 - INIT_Q_COUNTER(rx_read_requests), 3373 - INIT_Q_COUNTER(rx_atomic_requests), 3374 - INIT_Q_COUNTER(out_of_buffer), 3375 - }; 3376 - 3377 - static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { 3378 - INIT_Q_COUNTER(out_of_sequence), 3379 - }; 3380 - 3381 - static const struct mlx5_ib_counter retrans_q_cnts[] = { 3382 - INIT_Q_COUNTER(duplicate_request), 3383 - INIT_Q_COUNTER(rnr_nak_retry_err), 3384 - INIT_Q_COUNTER(packet_seq_err), 3385 - INIT_Q_COUNTER(implied_nak_seq_err), 3386 - INIT_Q_COUNTER(local_ack_timeout_err), 3387 - }; 3388 - 3389 - #define INIT_CONG_COUNTER(_name) \ 3390 - { .name = #_name, .offset = \ 3391 - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} 3392 - 3393 - static const struct mlx5_ib_counter cong_cnts[] = { 3394 - INIT_CONG_COUNTER(rp_cnp_ignored), 3395 - INIT_CONG_COUNTER(rp_cnp_handled), 3396 - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), 3397 - INIT_CONG_COUNTER(np_cnp_sent), 3398 - }; 3399 - 3400 - static const struct mlx5_ib_counter extended_err_cnts[] = { 3401 - INIT_Q_COUNTER(resp_local_length_error), 3402 - INIT_Q_COUNTER(resp_cqe_error), 3403 - INIT_Q_COUNTER(req_cqe_error), 3404 - INIT_Q_COUNTER(req_remote_invalid_request), 3405 - INIT_Q_COUNTER(req_remote_access_errors), 3406 - INIT_Q_COUNTER(resp_remote_access_errors), 3407 - INIT_Q_COUNTER(resp_cqe_flush_error), 3408 - INIT_Q_COUNTER(req_cqe_flush_error), 3409 - }; 3410 - 3411 - static const struct mlx5_ib_counter roce_accl_cnts[] = { 3412 - INIT_Q_COUNTER(roce_adp_retrans), 3413 - INIT_Q_COUNTER(roce_adp_retrans_to), 3414 - INIT_Q_COUNTER(roce_slow_restart), 3415 - INIT_Q_COUNTER(roce_slow_restart_cnps), 3416 - INIT_Q_COUNTER(roce_slow_restart_trans), 3417 - }; 3418 - 3419 - #define INIT_EXT_PPCNT_COUNTER(_name) \ 3420 - { .name = #_name, .offset = \ 3421 - MLX5_BYTE_OFF(ppcnt_reg, \ 3422 - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} 3423 - 3424 - static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { 3425 - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), 3426 - }; 3427 - 3428 - static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) 3429 - { 3430 - return MLX5_ESWITCH_MANAGER(mdev) && 3431 - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == 3432 - MLX5_ESWITCH_OFFLOADS; 3433 - } 3434 - 3435 - static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 3436 - { 3437 - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 3438 - int num_cnt_ports; 3439 - int i; 3440 - 3441 - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; 3442 - 3443 - MLX5_SET(dealloc_q_counter_in, in, opcode, 3444 - MLX5_CMD_OP_DEALLOC_Q_COUNTER); 3445 - 3446 - for (i = 0; i < num_cnt_ports; i++) { 3447 - if (dev->port[i].cnts.set_id) { 3448 - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 3449 - dev->port[i].cnts.set_id); 3450 - mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 3451 - } 3452 - kfree(dev->port[i].cnts.names); 3453 - kfree(dev->port[i].cnts.offsets); 3454 - } 3455 - } 3456 - 3457 - static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 3458 - struct mlx5_ib_counters *cnts) 3459 - { 3460 - u32 num_counters; 3461 - 3462 - num_counters = ARRAY_SIZE(basic_q_cnts); 3463 - 3464 - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) 3465 - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); 3466 - 3467 - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 3468 - num_counters += ARRAY_SIZE(retrans_q_cnts); 3469 - 3470 - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) 3471 - num_counters += ARRAY_SIZE(extended_err_cnts); 3472 - 3473 - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) 3474 - num_counters += ARRAY_SIZE(roce_accl_cnts); 3475 - 3476 - cnts->num_q_counters = num_counters; 3477 - 3478 - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3479 - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 3480 - num_counters += ARRAY_SIZE(cong_cnts); 3481 - } 3482 - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 3483 - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); 3484 - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); 3485 - } 3486 - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); 3487 - if (!cnts->names) 3488 - return -ENOMEM; 3489 - 3490 - cnts->offsets = kcalloc(num_counters, 3491 - sizeof(cnts->offsets), GFP_KERNEL); 3492 - if (!cnts->offsets) 3493 - goto err_names; 3494 - 3495 - return 0; 3496 - 3497 - err_names: 3498 - kfree(cnts->names); 3499 - cnts->names = NULL; 3500 - return -ENOMEM; 3501 - } 3502 - 3503 - static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 3504 - const char **names, 3505 - size_t *offsets) 3506 - { 3507 - int i; 3508 - int j = 0; 3509 - 3510 - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { 3511 - names[j] = basic_q_cnts[i].name; 3512 - offsets[j] = basic_q_cnts[i].offset; 3513 - } 3514 - 3515 - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 3516 - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { 3517 - names[j] = out_of_seq_q_cnts[i].name; 3518 - offsets[j] = out_of_seq_q_cnts[i].offset; 3519 - } 3520 - } 3521 - 3522 - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 3523 - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { 3524 - names[j] = retrans_q_cnts[i].name; 3525 - offsets[j] = retrans_q_cnts[i].offset; 3526 - } 3527 - } 3528 - 3529 - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 3530 - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { 3531 - names[j] = extended_err_cnts[i].name; 3532 - offsets[j] = extended_err_cnts[i].offset; 3533 - } 3534 - } 3535 - 3536 - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { 3537 - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { 3538 - names[j] = roce_accl_cnts[i].name; 3539 - offsets[j] = roce_accl_cnts[i].offset; 3540 - } 3541 - } 3542 - 3543 - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3544 - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 3545 - names[j] = cong_cnts[i].name; 3546 - offsets[j] = cong_cnts[i].offset; 3547 - } 3548 - } 3549 - 3550 - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 3551 - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { 3552 - names[j] = ext_ppcnt_cnts[i].name; 3553 - offsets[j] = ext_ppcnt_cnts[i].offset; 3554 - } 3555 - } 3556 - } 3557 - 3558 - static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 3559 - { 3560 - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 3561 - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 3562 - int num_cnt_ports; 3563 - int err = 0; 3564 - int i; 3565 - bool is_shared; 3566 - 3567 - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 3568 - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; 3569 - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; 3570 - 3571 - for (i = 0; i < num_cnt_ports; i++) { 3572 - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); 3573 - if (err) 3574 - goto err_alloc; 3575 - 3576 - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, 3577 - dev->port[i].cnts.offsets); 3578 - 3579 - MLX5_SET(alloc_q_counter_in, in, uid, 3580 - is_shared ? MLX5_SHARED_RESOURCE_UID : 0); 3581 - 3582 - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 3583 - if (err) { 3584 - mlx5_ib_warn(dev, 3585 - "couldn't allocate queue counter for port %d, err %d\n", 3586 - i + 1, err); 3587 - goto err_alloc; 3588 - } 3589 - 3590 - dev->port[i].cnts.set_id = 3591 - MLX5_GET(alloc_q_counter_out, out, counter_set_id); 3592 - } 3593 - return 0; 3594 - 3595 - err_alloc: 3596 - mlx5_ib_dealloc_counters(dev); 3597 - return err; 3598 - } 3599 - 3600 - static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, 3601 - u8 port_num) 3602 - { 3603 - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : 3604 - &dev->port[port_num].cnts; 3605 - } 3606 - 3607 - /** 3608 - * mlx5_ib_get_counters_id - Returns counters id to use for device+port 3609 - * @dev: Pointer to mlx5 IB device 3610 - * @port_num: Zero based port number 3611 - * 3612 - * mlx5_ib_get_counters_id() Returns counters set id to use for given 3613 - * device port combination in switchdev and non switchdev mode of the 3614 - * parent device. 3615 - */ 3616 - u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) 3617 - { 3618 - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 3619 - 3620 - return cnts->set_id; 3621 - } 3622 - 3623 - static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 3624 - u8 port_num) 3625 - { 3626 - struct mlx5_ib_dev *dev = to_mdev(ibdev); 3627 - const struct mlx5_ib_counters *cnts; 3628 - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); 3629 - 3630 - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) 3631 - return NULL; 3632 - 3633 - cnts = get_counters(dev, port_num - 1); 3634 - 3635 - return rdma_alloc_hw_stats_struct(cnts->names, 3636 - cnts->num_q_counters + 3637 - cnts->num_cong_counters + 3638 - cnts->num_ext_ppcnt_counters, 3639 - RDMA_HW_STATS_DEFAULT_LIFESPAN); 3640 - } 3641 - 3642 - static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, 3643 - const struct mlx5_ib_counters *cnts, 3644 - struct rdma_hw_stats *stats, 3645 - u16 set_id) 3646 - { 3647 - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 3648 - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 3649 - __be32 val; 3650 - int ret, i; 3651 - 3652 - MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 3653 - MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); 3654 - ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 3655 - if (ret) 3656 - return ret; 3657 - 3658 - for (i = 0; i < cnts->num_q_counters; i++) { 3659 - val = *(__be32 *)((void *)out + cnts->offsets[i]); 3660 - stats->value[i] = (u64)be32_to_cpu(val); 3661 - } 3662 - 3663 - return 0; 3664 - } 3665 - 3666 - static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, 3667 - const struct mlx5_ib_counters *cnts, 3668 - struct rdma_hw_stats *stats) 3669 - { 3670 - int offset = cnts->num_q_counters + cnts->num_cong_counters; 3671 - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 3672 - int ret, i; 3673 - void *out; 3674 - 3675 - out = kvzalloc(sz, GFP_KERNEL); 3676 - if (!out) 3677 - return -ENOMEM; 3678 - 3679 - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); 3680 - if (ret) 3681 - goto free; 3682 - 3683 - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) 3684 - stats->value[i + offset] = 3685 - be64_to_cpup((__be64 *)(out + 3686 - cnts->offsets[i + offset])); 3687 - free: 3688 - kvfree(out); 3689 - return ret; 3690 - } 3691 - 3692 - static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 3693 - struct rdma_hw_stats *stats, 3694 - u8 port_num, int index) 3695 - { 3696 - struct mlx5_ib_dev *dev = to_mdev(ibdev); 3697 - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); 3698 - struct mlx5_core_dev *mdev; 3699 - int ret, num_counters; 3700 - u8 mdev_port_num; 3701 - 3702 - if (!stats) 3703 - return -EINVAL; 3704 - 3705 - num_counters = cnts->num_q_counters + 3706 - cnts->num_cong_counters + 3707 - cnts->num_ext_ppcnt_counters; 3708 - 3709 - /* q_counters are per IB device, query the master mdev */ 3710 - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); 3711 - if (ret) 3712 - return ret; 3713 - 3714 - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 3715 - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); 3716 - if (ret) 3717 - return ret; 3718 - } 3719 - 3720 - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3721 - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, 3722 - &mdev_port_num); 3723 - if (!mdev) { 3724 - /* If port is not affiliated yet, its in down state 3725 - * which doesn't have any counters yet, so it would be 3726 - * zero. So no need to read from the HCA. 3727 - */ 3728 - goto done; 3729 - } 3730 - ret = mlx5_lag_query_cong_counters(dev->mdev, 3731 - stats->value + 3732 - cnts->num_q_counters, 3733 - cnts->num_cong_counters, 3734 - cnts->offsets + 3735 - cnts->num_q_counters); 3736 - 3737 - mlx5_ib_put_native_port_mdev(dev, port_num); 3738 - if (ret) 3739 - return ret; 3740 - } 3741 - 3742 - done: 3743 - return num_counters; 3744 - } 3745 - 3746 - static struct rdma_hw_stats * 3747 - mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) 3748 - { 3749 - struct mlx5_ib_dev *dev = to_mdev(counter->device); 3750 - const struct mlx5_ib_counters *cnts = 3751 - get_counters(dev, counter->port - 1); 3752 - 3753 - return rdma_alloc_hw_stats_struct(cnts->names, 3754 - cnts->num_q_counters + 3755 - cnts->num_cong_counters + 3756 - cnts->num_ext_ppcnt_counters, 3757 - RDMA_HW_STATS_DEFAULT_LIFESPAN); 3758 - } 3759 - 3760 - static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) 3761 - { 3762 - struct mlx5_ib_dev *dev = to_mdev(counter->device); 3763 - const struct mlx5_ib_counters *cnts = 3764 - get_counters(dev, counter->port - 1); 3765 - 3766 - return mlx5_ib_query_q_counters(dev->mdev, cnts, 3767 - counter->stats, counter->id); 3768 - } 3769 - 3770 - static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) 3771 - { 3772 - struct mlx5_ib_dev *dev = to_mdev(counter->device); 3773 - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 3774 - 3775 - if (!counter->id) 3776 - return 0; 3777 - 3778 - MLX5_SET(dealloc_q_counter_in, in, opcode, 3779 - MLX5_CMD_OP_DEALLOC_Q_COUNTER); 3780 - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); 3781 - return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 3782 - } 3783 - 3784 - static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, 3785 - struct ib_qp *qp) 3786 - { 3787 - struct mlx5_ib_dev *dev = to_mdev(qp->device); 3788 - int err; 3789 - 3790 - if (!counter->id) { 3791 - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 3792 - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 3793 - 3794 - MLX5_SET(alloc_q_counter_in, in, opcode, 3795 - MLX5_CMD_OP_ALLOC_Q_COUNTER); 3796 - MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); 3797 - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 3798 - if (err) 3799 - return err; 3800 - counter->id = 3801 - MLX5_GET(alloc_q_counter_out, out, counter_set_id); 3802 - } 3803 - 3804 - err = mlx5_ib_qp_set_counter(qp, counter); 3805 - if (err) 3806 - goto fail_set_counter; 3807 - 3808 - return 0; 3809 - 3810 - fail_set_counter: 3811 - mlx5_ib_counter_dealloc(counter); 3812 - counter->id = 0; 3813 - 3814 - return err; 3815 - } 3816 - 3817 - static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) 3818 - { 3819 - return mlx5_ib_qp_set_counter(qp, NULL); 3820 - } 3821 - 3822 5335 static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, 3823 5336 enum rdma_netdev_t type, 3824 5337 struct rdma_netdev_alloc_params *params) ··· 3367 5800 return -EOPNOTSUPP; 3368 5801 3369 5802 return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); 3370 - } 3371 - 3372 - static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) 3373 - { 3374 - if (!dev->delay_drop.dir_debugfs) 3375 - return; 3376 - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); 3377 - dev->delay_drop.dir_debugfs = NULL; 3378 - } 3379 - 3380 - static void cancel_delay_drop(struct mlx5_ib_dev *dev) 3381 - { 3382 - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 3383 - return; 3384 - 3385 - cancel_work_sync(&dev->delay_drop.delay_drop_work); 3386 - delay_drop_debugfs_cleanup(dev); 3387 5803 } 3388 5804 3389 5805 static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, ··· 3407 5857 .write = delay_drop_timeout_write, 3408 5858 .read = delay_drop_timeout_read, 3409 5859 }; 3410 - 3411 - static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) 3412 - { 3413 - struct dentry *root; 3414 - 3415 - if (!mlx5_debugfs_root) 3416 - return; 3417 - 3418 - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); 3419 - dev->delay_drop.dir_debugfs = root; 3420 - 3421 - debugfs_create_atomic_t("num_timeout_events", 0400, root, 3422 - &dev->delay_drop.events_cnt); 3423 - debugfs_create_atomic_t("num_rqs", 0400, root, 3424 - &dev->delay_drop.rqs_cnt); 3425 - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, 3426 - &fops_delay_drop_timeout); 3427 - } 3428 - 3429 - static void init_delay_drop(struct mlx5_ib_dev *dev) 3430 - { 3431 - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 3432 - return; 3433 - 3434 - mutex_init(&dev->delay_drop.lock); 3435 - dev->delay_drop.dev = dev; 3436 - dev->delay_drop.activate = false; 3437 - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; 3438 - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); 3439 - atomic_set(&dev->delay_drop.rqs_cnt, 0); 3440 - atomic_set(&dev->delay_drop.events_cnt, 0); 3441 - 3442 - delay_drop_debugfs_init(dev); 3443 - } 3444 5860 3445 5861 static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, 3446 5862 struct mlx5_ib_multiport_info *mpi) ··· 3901 6385 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, 3902 6386 enum mlx5_ib_uapi_flow_action_flags)); 3903 6387 6388 + ADD_UVERBS_ATTRIBUTES_SIMPLE( 6389 + mlx5_ib_query_context, 6390 + UVERBS_OBJECT_DEVICE, 6391 + UVERBS_METHOD_QUERY_CONTEXT, 6392 + UVERBS_ATTR_PTR_OUT( 6393 + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, 6394 + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, 6395 + dump_fill_mkey), 6396 + UA_MANDATORY)); 6397 + 3904 6398 static const struct uapi_definition mlx5_ib_defs[] = { 3905 6399 UAPI_DEF_CHAIN(mlx5_ib_devx_defs), 3906 6400 UAPI_DEF_CHAIN(mlx5_ib_flow_defs), 3907 6401 UAPI_DEF_CHAIN(mlx5_ib_qos_defs), 6402 + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), 3908 6403 3909 6404 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, 3910 6405 &mlx5_ib_flow_action), 3911 6406 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), 6407 + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), 3912 6408 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, 3913 6409 UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), 3914 6410 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), 3915 6411 {} 3916 6412 }; 3917 - 3918 - static int mlx5_ib_read_counters(struct ib_counters *counters, 3919 - struct ib_counters_read_attr *read_attr, 3920 - struct uverbs_attr_bundle *attrs) 3921 - { 3922 - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 3923 - struct mlx5_read_counters_attr mread_attr = {}; 3924 - struct mlx5_ib_flow_counters_desc *desc; 3925 - int ret, i; 3926 - 3927 - mutex_lock(&mcounters->mcntrs_mutex); 3928 - if (mcounters->cntrs_max_index > read_attr->ncounters) { 3929 - ret = -EINVAL; 3930 - goto err_bound; 3931 - } 3932 - 3933 - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), 3934 - GFP_KERNEL); 3935 - if (!mread_attr.out) { 3936 - ret = -ENOMEM; 3937 - goto err_bound; 3938 - } 3939 - 3940 - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; 3941 - mread_attr.flags = read_attr->flags; 3942 - ret = mcounters->read_counters(counters->device, &mread_attr); 3943 - if (ret) 3944 - goto err_read; 3945 - 3946 - /* do the pass over the counters data array to assign according to the 3947 - * descriptions and indexing pairs 3948 - */ 3949 - desc = mcounters->counters_data; 3950 - for (i = 0; i < mcounters->ncounters; i++) 3951 - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; 3952 - 3953 - err_read: 3954 - kfree(mread_attr.out); 3955 - err_bound: 3956 - mutex_unlock(&mcounters->mcntrs_mutex); 3957 - return ret; 3958 - } 3959 - 3960 - static int mlx5_ib_destroy_counters(struct ib_counters *counters) 3961 - { 3962 - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 3963 - 3964 - counters_clear_description(counters); 3965 - if (mcounters->hw_cntrs_hndl) 3966 - mlx5_fc_destroy(to_mdev(counters->device)->mdev, 3967 - mcounters->hw_cntrs_hndl); 3968 - 3969 - kfree(mcounters); 3970 - 3971 - return 0; 3972 - } 3973 - 3974 - static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, 3975 - struct uverbs_attr_bundle *attrs) 3976 - { 3977 - struct mlx5_ib_mcounters *mcounters; 3978 - 3979 - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); 3980 - if (!mcounters) 3981 - return ERR_PTR(-ENOMEM); 3982 - 3983 - mutex_init(&mcounters->mcntrs_mutex); 3984 - 3985 - return &mcounters->ibcntrs; 3986 - } 3987 6413 3988 6414 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) 3989 6415 { ··· 4005 6547 return -ENOMEM; 4006 6548 } 4007 6549 4008 - static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) 6550 + static int mlx5_ib_enable_driver(struct ib_device *dev) 4009 6551 { 4010 - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); 6552 + struct mlx5_ib_dev *mdev = to_mdev(dev); 6553 + int ret; 4011 6554 4012 - if (!dev->flow_db) 4013 - return -ENOMEM; 6555 + ret = mlx5_ib_test_wc(mdev); 6556 + mlx5_ib_dbg(mdev, "Write-Combining %s", 6557 + mdev->wc_support ? "supported" : "not supported"); 4014 6558 4015 - mutex_init(&dev->flow_db->lock); 4016 - 4017 - return 0; 4018 - } 4019 - 4020 - static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) 4021 - { 4022 - kfree(dev->flow_db); 6559 + return ret; 4023 6560 } 4024 6561 4025 6562 static const struct ib_device_ops mlx5_ib_dev_ops = { ··· 4030 6577 .attach_mcast = mlx5_ib_mcg_attach, 4031 6578 .check_mr_status = mlx5_ib_check_mr_status, 4032 6579 .create_ah = mlx5_ib_create_ah, 4033 - .create_counters = mlx5_ib_create_counters, 4034 6580 .create_cq = mlx5_ib_create_cq, 4035 - .create_flow = mlx5_ib_create_flow, 4036 6581 .create_qp = mlx5_ib_create_qp, 4037 6582 .create_srq = mlx5_ib_create_srq, 4038 6583 .dealloc_pd = mlx5_ib_dealloc_pd, ··· 4038 6587 .del_gid = mlx5_ib_del_gid, 4039 6588 .dereg_mr = mlx5_ib_dereg_mr, 4040 6589 .destroy_ah = mlx5_ib_destroy_ah, 4041 - .destroy_counters = mlx5_ib_destroy_counters, 4042 6590 .destroy_cq = mlx5_ib_destroy_cq, 4043 - .destroy_flow = mlx5_ib_destroy_flow, 4044 - .destroy_flow_action = mlx5_ib_destroy_flow_action, 4045 6591 .destroy_qp = mlx5_ib_destroy_qp, 4046 6592 .destroy_srq = mlx5_ib_destroy_srq, 4047 6593 .detach_mcast = mlx5_ib_mcg_detach, ··· 4046 6598 .drain_rq = mlx5_ib_drain_rq, 4047 6599 .drain_sq = mlx5_ib_drain_sq, 4048 6600 .enable_driver = mlx5_ib_enable_driver, 4049 - .fill_res_entry = mlx5_ib_fill_res_entry, 4050 - .fill_stat_entry = mlx5_ib_fill_stat_entry, 4051 6601 .get_dev_fw_str = get_dev_fw_str, 4052 6602 .get_dma_mr = mlx5_ib_get_dma_mr, 4053 6603 .get_link_layer = mlx5_ib_port_link_layer, ··· 4069 6623 .query_pkey = mlx5_ib_query_pkey, 4070 6624 .query_qp = mlx5_ib_query_qp, 4071 6625 .query_srq = mlx5_ib_query_srq, 4072 - .read_counters = mlx5_ib_read_counters, 6626 + .query_ucontext = mlx5_ib_query_ucontext, 4073 6627 .reg_user_mr = mlx5_ib_reg_user_mr, 4074 6628 .req_notify_cq = mlx5_ib_arm_cq, 4075 6629 .rereg_user_mr = mlx5_ib_rereg_user_mr, 4076 6630 .resize_cq = mlx5_ib_resize_cq, 4077 6631 4078 6632 INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), 6633 + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), 4079 6634 INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), 4080 6635 INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), 4081 6636 INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), 4082 6637 INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), 4083 - }; 4084 - 4085 - static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { 4086 - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, 4087 - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, 4088 6638 }; 4089 6639 4090 6640 static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { ··· 4103 6661 static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { 4104 6662 .alloc_xrcd = mlx5_ib_alloc_xrcd, 4105 6663 .dealloc_xrcd = mlx5_ib_dealloc_xrcd, 6664 + 6665 + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), 4106 6666 }; 4107 6667 4108 6668 static const struct ib_device_ops mlx5_ib_dev_dm_ops = { ··· 4213 6769 MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) 4214 6770 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); 4215 6771 4216 - if (mlx5_accel_ipsec_device_caps(dev->mdev) & 4217 - MLX5_ACCEL_IPSEC_CAP_DEVICE) 4218 - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); 4219 6772 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); 4220 6773 4221 6774 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) ··· 4270 6829 .modify_wq = mlx5_ib_modify_wq, 4271 6830 }; 4272 6831 4273 - static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) 4274 - { 4275 - u8 port_num; 4276 - 4277 - dev->ib_dev.uverbs_ex_cmd_mask |= 4278 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 4279 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 4280 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 4281 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 4282 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 4283 - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); 4284 - 4285 - port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4286 - 4287 - /* Register only for native ports */ 4288 - return mlx5_add_netdev_notifier(dev, port_num); 4289 - } 4290 - 4291 - static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) 4292 - { 4293 - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4294 - 4295 - mlx5_remove_netdev_notifier(dev, port_num); 4296 - } 4297 - 4298 - static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) 6832 + static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) 4299 6833 { 4300 6834 struct mlx5_core_dev *mdev = dev->mdev; 4301 6835 enum rdma_link_layer ll; 4302 6836 int port_type_cap; 4303 - int err = 0; 4304 - 4305 - port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4306 - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4307 - 4308 - if (ll == IB_LINK_LAYER_ETHERNET) 4309 - err = mlx5_ib_stage_common_roce_init(dev); 4310 - 4311 - return err; 4312 - } 4313 - 4314 - static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) 4315 - { 4316 - mlx5_ib_stage_common_roce_cleanup(dev); 4317 - } 4318 - 4319 - static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) 4320 - { 4321 - struct mlx5_core_dev *mdev = dev->mdev; 4322 - enum rdma_link_layer ll; 4323 - int port_type_cap; 6837 + u8 port_num = 0; 4324 6838 int err; 4325 6839 4326 6840 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4327 6841 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4328 6842 4329 6843 if (ll == IB_LINK_LAYER_ETHERNET) { 4330 - err = mlx5_ib_stage_common_roce_init(dev); 4331 - if (err) 6844 + dev->ib_dev.uverbs_ex_cmd_mask |= 6845 + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 6846 + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 6847 + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 6848 + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 6849 + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 6850 + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); 6851 + 6852 + port_num = mlx5_core_native_port_num(dev->mdev) - 1; 6853 + 6854 + /* Register only for native ports */ 6855 + err = mlx5_add_netdev_notifier(dev, port_num); 6856 + if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev)) 6857 + /* 6858 + * We don't enable ETH interface for 6859 + * 1. IB representors 6860 + * 2. User disabled ROCE through devlink interface 6861 + */ 4332 6862 return err; 4333 6863 4334 6864 err = mlx5_enable_eth(dev); ··· 4309 6897 4310 6898 return 0; 4311 6899 cleanup: 4312 - mlx5_ib_stage_common_roce_cleanup(dev); 4313 - 6900 + mlx5_remove_netdev_notifier(dev, port_num); 4314 6901 return err; 4315 6902 } 4316 6903 4317 - static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) 6904 + static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) 4318 6905 { 4319 6906 struct mlx5_core_dev *mdev = dev->mdev; 4320 6907 enum rdma_link_layer ll; 4321 6908 int port_type_cap; 6909 + u8 port_num; 4322 6910 4323 6911 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4324 6912 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4325 6913 4326 6914 if (ll == IB_LINK_LAYER_ETHERNET) { 4327 - mlx5_disable_eth(dev); 4328 - mlx5_ib_stage_common_roce_cleanup(dev); 6915 + if (!dev->is_rep) 6916 + mlx5_disable_eth(dev); 6917 + 6918 + port_num = mlx5_core_native_port_num(dev->mdev) - 1; 6919 + mlx5_remove_netdev_notifier(dev, port_num); 4329 6920 } 4330 - } 4331 - 4332 - static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) 4333 - { 4334 - return create_dev_resources(&dev->devr); 4335 - } 4336 - 4337 - static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) 4338 - { 4339 - destroy_dev_resources(&dev->devr); 4340 - } 4341 - 4342 - static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) 4343 - { 4344 - return mlx5_ib_odp_init_one(dev); 4345 - } 4346 - 4347 - static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) 4348 - { 4349 - mlx5_ib_odp_cleanup_one(dev); 4350 - } 4351 - 4352 - static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { 4353 - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, 4354 - .get_hw_stats = mlx5_ib_get_hw_stats, 4355 - .counter_bind_qp = mlx5_ib_counter_bind_qp, 4356 - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 4357 - .counter_dealloc = mlx5_ib_counter_dealloc, 4358 - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 4359 - .counter_update_stats = mlx5_ib_counter_update_stats, 4360 - }; 4361 - 4362 - static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) 4363 - { 4364 - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { 4365 - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); 4366 - 4367 - return mlx5_ib_alloc_counters(dev); 4368 - } 4369 - 4370 - return 0; 4371 - } 4372 - 4373 - static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) 4374 - { 4375 - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 4376 - mlx5_ib_dealloc_counters(dev); 4377 6921 } 4378 6922 4379 6923 static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) ··· 4391 7023 4392 7024 static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) 4393 7025 { 4394 - destroy_umrc_res(dev); 7026 + int err; 7027 + 7028 + err = mlx5_mr_cache_cleanup(dev); 7029 + if (err) 7030 + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 7031 + 7032 + if (dev->umrc.qp) 7033 + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); 7034 + if (dev->umrc.cq) 7035 + ib_free_cq(dev->umrc.cq); 7036 + if (dev->umrc.pd) 7037 + ib_dealloc_pd(dev->umrc.pd); 4395 7038 } 4396 7039 4397 7040 static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ··· 4410 7031 ib_unregister_device(&dev->ib_dev); 4411 7032 } 4412 7033 7034 + enum { 7035 + MAX_UMR_WR = 128, 7036 + }; 7037 + 4413 7038 static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) 4414 7039 { 4415 - return create_umr_res(dev); 7040 + struct ib_qp_init_attr *init_attr = NULL; 7041 + struct ib_qp_attr *attr = NULL; 7042 + struct ib_pd *pd; 7043 + struct ib_cq *cq; 7044 + struct ib_qp *qp; 7045 + int ret; 7046 + 7047 + attr = kzalloc(sizeof(*attr), GFP_KERNEL); 7048 + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 7049 + if (!attr || !init_attr) { 7050 + ret = -ENOMEM; 7051 + goto error_0; 7052 + } 7053 + 7054 + pd = ib_alloc_pd(&dev->ib_dev, 0); 7055 + if (IS_ERR(pd)) { 7056 + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 7057 + ret = PTR_ERR(pd); 7058 + goto error_0; 7059 + } 7060 + 7061 + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 7062 + if (IS_ERR(cq)) { 7063 + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 7064 + ret = PTR_ERR(cq); 7065 + goto error_2; 7066 + } 7067 + 7068 + init_attr->send_cq = cq; 7069 + init_attr->recv_cq = cq; 7070 + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 7071 + init_attr->cap.max_send_wr = MAX_UMR_WR; 7072 + init_attr->cap.max_send_sge = 1; 7073 + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 7074 + init_attr->port_num = 1; 7075 + qp = mlx5_ib_create_qp(pd, init_attr, NULL); 7076 + if (IS_ERR(qp)) { 7077 + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 7078 + ret = PTR_ERR(qp); 7079 + goto error_3; 7080 + } 7081 + qp->device = &dev->ib_dev; 7082 + qp->real_qp = qp; 7083 + qp->uobject = NULL; 7084 + qp->qp_type = MLX5_IB_QPT_REG_UMR; 7085 + qp->send_cq = init_attr->send_cq; 7086 + qp->recv_cq = init_attr->recv_cq; 7087 + 7088 + attr->qp_state = IB_QPS_INIT; 7089 + attr->port_num = 1; 7090 + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | 7091 + IB_QP_PORT, NULL); 7092 + if (ret) { 7093 + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 7094 + goto error_4; 7095 + } 7096 + 7097 + memset(attr, 0, sizeof(*attr)); 7098 + attr->qp_state = IB_QPS_RTR; 7099 + attr->path_mtu = IB_MTU_256; 7100 + 7101 + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 7102 + if (ret) { 7103 + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 7104 + goto error_4; 7105 + } 7106 + 7107 + memset(attr, 0, sizeof(*attr)); 7108 + attr->qp_state = IB_QPS_RTS; 7109 + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 7110 + if (ret) { 7111 + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 7112 + goto error_4; 7113 + } 7114 + 7115 + dev->umrc.qp = qp; 7116 + dev->umrc.cq = cq; 7117 + dev->umrc.pd = pd; 7118 + 7119 + sema_init(&dev->umrc.sem, MAX_UMR_WR); 7120 + ret = mlx5_mr_cache_init(dev); 7121 + if (ret) { 7122 + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 7123 + goto error_4; 7124 + } 7125 + 7126 + kfree(attr); 7127 + kfree(init_attr); 7128 + 7129 + return 0; 7130 + 7131 + error_4: 7132 + mlx5_ib_destroy_qp(qp, NULL); 7133 + dev->umrc.qp = NULL; 7134 + 7135 + error_3: 7136 + ib_free_cq(cq); 7137 + dev->umrc.cq = NULL; 7138 + 7139 + error_2: 7140 + ib_dealloc_pd(pd); 7141 + dev->umrc.pd = NULL; 7142 + 7143 + error_0: 7144 + kfree(attr); 7145 + kfree(init_attr); 7146 + return ret; 4416 7147 } 4417 7148 4418 7149 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) 4419 7150 { 4420 - init_delay_drop(dev); 7151 + struct dentry *root; 4421 7152 7153 + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 7154 + return 0; 7155 + 7156 + mutex_init(&dev->delay_drop.lock); 7157 + dev->delay_drop.dev = dev; 7158 + dev->delay_drop.activate = false; 7159 + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; 7160 + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); 7161 + atomic_set(&dev->delay_drop.rqs_cnt, 0); 7162 + atomic_set(&dev->delay_drop.events_cnt, 0); 7163 + 7164 + if (!mlx5_debugfs_root) 7165 + return 0; 7166 + 7167 + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); 7168 + dev->delay_drop.dir_debugfs = root; 7169 + 7170 + debugfs_create_atomic_t("num_timeout_events", 0400, root, 7171 + &dev->delay_drop.events_cnt); 7172 + debugfs_create_atomic_t("num_rqs", 0400, root, 7173 + &dev->delay_drop.rqs_cnt); 7174 + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, 7175 + &fops_delay_drop_timeout); 4422 7176 return 0; 4423 7177 } 4424 7178 4425 7179 static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) 4426 7180 { 4427 - cancel_delay_drop(dev); 7181 + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 7182 + return; 7183 + 7184 + cancel_work_sync(&dev->delay_drop.delay_drop_work); 7185 + if (!dev->delay_drop.dir_debugfs) 7186 + return; 7187 + 7188 + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); 7189 + dev->delay_drop.dir_debugfs = NULL; 4428 7190 } 4429 7191 4430 7192 static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) ··· 4578 7058 static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) 4579 7059 { 4580 7060 mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); 4581 - } 4582 - 4583 - static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) 4584 - { 4585 - int uid; 4586 - 4587 - uid = mlx5_ib_devx_create(dev, false); 4588 - if (uid > 0) { 4589 - dev->devx_whitelist_uid = uid; 4590 - mlx5_ib_devx_init_event_table(dev); 4591 - } 4592 - 4593 - return 0; 4594 - } 4595 - static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) 4596 - { 4597 - if (dev->devx_whitelist_uid) { 4598 - mlx5_ib_devx_cleanup_event_table(dev); 4599 - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); 4600 - } 4601 - } 4602 - 4603 - int mlx5_ib_enable_driver(struct ib_device *dev) 4604 - { 4605 - struct mlx5_ib_dev *mdev = to_mdev(dev); 4606 - int ret; 4607 - 4608 - ret = mlx5_ib_test_wc(mdev); 4609 - mlx5_ib_dbg(mdev, "Write-Combining %s", 4610 - mdev->wc_support ? "supported" : "not supported"); 4611 - 4612 - return ret; 4613 7061 } 4614 7062 4615 7063 void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ··· 4627 7139 STAGE_CREATE(MLX5_IB_STAGE_INIT, 4628 7140 mlx5_ib_stage_init_init, 4629 7141 mlx5_ib_stage_init_cleanup), 4630 - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, 4631 - mlx5_ib_stage_flow_db_init, 4632 - mlx5_ib_stage_flow_db_cleanup), 7142 + STAGE_CREATE(MLX5_IB_STAGE_FS, 7143 + mlx5_ib_fs_init, 7144 + mlx5_ib_fs_cleanup), 4633 7145 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 4634 7146 mlx5_ib_stage_caps_init, 4635 7147 mlx5_ib_stage_caps_cleanup), ··· 4637 7149 mlx5_ib_stage_non_default_cb, 4638 7150 NULL), 4639 7151 STAGE_CREATE(MLX5_IB_STAGE_ROCE, 4640 - mlx5_ib_stage_roce_init, 4641 - mlx5_ib_stage_roce_cleanup), 7152 + mlx5_ib_roce_init, 7153 + mlx5_ib_roce_cleanup), 4642 7154 STAGE_CREATE(MLX5_IB_STAGE_QP, 4643 7155 mlx5_init_qp_table, 4644 7156 mlx5_cleanup_qp_table), ··· 4646 7158 mlx5_init_srq_table, 4647 7159 mlx5_cleanup_srq_table), 4648 7160 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4649 - mlx5_ib_stage_dev_res_init, 4650 - mlx5_ib_stage_dev_res_cleanup), 7161 + mlx5_ib_dev_res_init, 7162 + mlx5_ib_dev_res_cleanup), 4651 7163 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4652 7164 mlx5_ib_stage_dev_notifier_init, 4653 7165 mlx5_ib_stage_dev_notifier_cleanup), 4654 7166 STAGE_CREATE(MLX5_IB_STAGE_ODP, 4655 - mlx5_ib_stage_odp_init, 4656 - mlx5_ib_stage_odp_cleanup), 7167 + mlx5_ib_odp_init_one, 7168 + mlx5_ib_odp_cleanup_one), 4657 7169 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 4658 - mlx5_ib_stage_counters_init, 4659 - mlx5_ib_stage_counters_cleanup), 7170 + mlx5_ib_counters_init, 7171 + mlx5_ib_counters_cleanup), 4660 7172 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, 4661 7173 mlx5_ib_stage_cong_debugfs_init, 4662 7174 mlx5_ib_stage_cong_debugfs_cleanup), ··· 4670 7182 NULL, 4671 7183 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 4672 7184 STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, 4673 - mlx5_ib_stage_devx_init, 4674 - mlx5_ib_stage_devx_cleanup), 7185 + mlx5_ib_devx_init, 7186 + mlx5_ib_devx_cleanup), 4675 7187 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4676 7188 mlx5_ib_stage_ib_reg_init, 4677 7189 mlx5_ib_stage_ib_reg_cleanup), ··· 4681 7193 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 4682 7194 mlx5_ib_stage_delay_drop_init, 4683 7195 mlx5_ib_stage_delay_drop_cleanup), 7196 + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, 7197 + mlx5_ib_restrack_init, 7198 + NULL), 4684 7199 }; 4685 7200 4686 7201 const struct mlx5_ib_profile raw_eth_profile = { 4687 7202 STAGE_CREATE(MLX5_IB_STAGE_INIT, 4688 7203 mlx5_ib_stage_init_init, 4689 7204 mlx5_ib_stage_init_cleanup), 4690 - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, 4691 - mlx5_ib_stage_flow_db_init, 4692 - mlx5_ib_stage_flow_db_cleanup), 7205 + STAGE_CREATE(MLX5_IB_STAGE_FS, 7206 + mlx5_ib_fs_init, 7207 + mlx5_ib_fs_cleanup), 4693 7208 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 4694 7209 mlx5_ib_stage_caps_init, 4695 7210 mlx5_ib_stage_caps_cleanup), ··· 4700 7209 mlx5_ib_stage_raw_eth_non_default_cb, 4701 7210 NULL), 4702 7211 STAGE_CREATE(MLX5_IB_STAGE_ROCE, 4703 - mlx5_ib_stage_raw_eth_roce_init, 4704 - mlx5_ib_stage_raw_eth_roce_cleanup), 7212 + mlx5_ib_roce_init, 7213 + mlx5_ib_roce_cleanup), 4705 7214 STAGE_CREATE(MLX5_IB_STAGE_QP, 4706 7215 mlx5_init_qp_table, 4707 7216 mlx5_cleanup_qp_table), ··· 4709 7218 mlx5_init_srq_table, 4710 7219 mlx5_cleanup_srq_table), 4711 7220 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4712 - mlx5_ib_stage_dev_res_init, 4713 - mlx5_ib_stage_dev_res_cleanup), 7221 + mlx5_ib_dev_res_init, 7222 + mlx5_ib_dev_res_cleanup), 4714 7223 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4715 7224 mlx5_ib_stage_dev_notifier_init, 4716 7225 mlx5_ib_stage_dev_notifier_cleanup), 4717 7226 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 4718 - mlx5_ib_stage_counters_init, 4719 - mlx5_ib_stage_counters_cleanup), 7227 + mlx5_ib_counters_init, 7228 + mlx5_ib_counters_cleanup), 4720 7229 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, 4721 7230 mlx5_ib_stage_cong_debugfs_init, 4722 7231 mlx5_ib_stage_cong_debugfs_cleanup), ··· 4730 7239 NULL, 4731 7240 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 4732 7241 STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, 4733 - mlx5_ib_stage_devx_init, 4734 - mlx5_ib_stage_devx_cleanup), 7242 + mlx5_ib_devx_init, 7243 + mlx5_ib_devx_cleanup), 4735 7244 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4736 7245 mlx5_ib_stage_ib_reg_init, 4737 7246 mlx5_ib_stage_ib_reg_cleanup), 4738 7247 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 4739 7248 mlx5_ib_stage_post_ib_reg_umr_init, 7249 + NULL), 7250 + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, 7251 + mlx5_ib_restrack_init, 4740 7252 NULL), 4741 7253 }; 4742 7254
+17 -92
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 3 + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 31 4 */ 32 5 33 6 #ifndef MLX5_IB_H ··· 703 730 704 731 struct mlx5_ib_resources { 705 732 struct ib_cq *c0; 706 - struct ib_xrcd *x0; 707 - struct ib_xrcd *x1; 733 + u32 xrcdn0; 734 + u32 xrcdn1; 708 735 struct ib_pd *p0; 709 736 struct ib_srq *s0; 710 737 struct ib_srq *s1; ··· 805 832 806 833 enum mlx5_ib_stages { 807 834 MLX5_IB_STAGE_INIT, 808 - MLX5_IB_STAGE_FLOW_DB, 835 + MLX5_IB_STAGE_FS, 809 836 MLX5_IB_STAGE_CAPS, 810 837 MLX5_IB_STAGE_NON_DEFAULT_CB, 811 838 MLX5_IB_STAGE_ROCE, ··· 823 850 MLX5_IB_STAGE_IB_REG, 824 851 MLX5_IB_STAGE_POST_IB_REG_UMR, 825 852 MLX5_IB_STAGE_DELAY_DROP, 826 - MLX5_IB_STAGE_CLASS_ATTR, 853 + MLX5_IB_STAGE_RESTRACK, 827 854 MLX5_IB_STAGE_MAX, 828 855 }; 829 856 ··· 1051 1078 return container_of(core_qp, struct mlx5_ib_rwq, core_qp); 1052 1079 } 1053 1080 1054 - static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) 1055 - { 1056 - return container_of(mmkey, struct mlx5_ib_mr, mmkey); 1057 - } 1058 - 1059 1081 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) 1060 1082 { 1061 1083 return container_of(ibpd, struct mlx5_ib_pd, ibpd); ··· 1178 1210 struct ib_pd *pd, struct ib_udata *udata); 1179 1211 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 1180 1212 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1181 - u32 max_num_sg, struct ib_udata *udata); 1213 + u32 max_num_sg); 1182 1214 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 1183 1215 u32 max_num_sg, 1184 1216 u32 max_num_meta_sg); ··· 1192 1224 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 1193 1225 const struct ib_mad *in, struct ib_mad *out, 1194 1226 size_t *out_mad_size, u16 *out_mad_pkey_index); 1195 - struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, 1196 - struct ib_udata *udata); 1197 - int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 1227 + int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 1228 + void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 1198 1229 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); 1199 1230 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); 1200 1231 int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, ··· 1342 1375 u8 *native_port_num); 1343 1376 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, 1344 1377 u8 port_num); 1345 - int mlx5_ib_fill_res_entry(struct sk_buff *msg, 1346 - struct rdma_restrack_entry *res); 1347 - int mlx5_ib_fill_stat_entry(struct sk_buff *msg, 1348 - struct rdma_restrack_entry *res); 1349 1378 1350 1379 extern const struct uapi_definition mlx5_ib_devx_defs[]; 1351 1380 extern const struct uapi_definition mlx5_ib_flow_defs[]; 1352 1381 extern const struct uapi_definition mlx5_ib_qos_defs[]; 1382 + extern const struct uapi_definition mlx5_ib_std_types_defs[]; 1353 1383 1354 - #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 1355 - int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); 1356 - void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); 1357 - void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); 1358 - void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); 1359 - struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( 1360 - struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, 1361 - struct mlx5_flow_context *flow_context, 1362 - struct mlx5_flow_act *flow_act, u32 counter_id, 1363 - void *cmd_in, int inlen, int dest_id, int dest_type); 1364 - bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); 1365 - bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); 1366 - void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); 1367 - #else 1368 - static inline int 1369 - mlx5_ib_devx_create(struct mlx5_ib_dev *dev, 1370 - bool is_user) { return -EOPNOTSUPP; } 1371 - static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} 1372 - static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} 1373 - static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} 1374 - static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, 1375 - int *dest_type) 1376 - { 1377 - return false; 1378 - } 1379 - static inline void 1380 - mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) 1381 - { 1382 - return; 1383 - }; 1384 - #endif 1385 1384 static inline void init_query_mad(struct ib_smp *mad) 1386 1385 { 1387 1386 mad->base_version = 1; 1388 1387 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; 1389 1388 mad->class_version = 1; 1390 1389 mad->method = IB_MGMT_METHOD_GET; 1391 - } 1392 - 1393 - static inline u8 convert_access(int acc) 1394 - { 1395 - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | 1396 - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | 1397 - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | 1398 - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | 1399 - MLX5_PERM_LOCAL_READ; 1400 1390 } 1401 1391 1402 1392 static inline int is_qp1(enum ib_qp_type qp_type) ··· 1442 1518 struct mlx5_bfreg_info *bfregi, u32 bfregn, 1443 1519 bool dyn_bfreg); 1444 1520 1445 - int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); 1446 - u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); 1447 - 1448 1521 static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, 1449 1522 bool do_modify_atomic, int access_flags) 1450 1523 { ··· 1454 1533 return false; 1455 1534 1456 1535 if (access_flags & IB_ACCESS_RELAXED_ORDERING && 1457 - (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) || 1458 - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))) 1536 + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 1537 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 1538 + return false; 1539 + 1540 + if (access_flags & IB_ACCESS_RELAXED_ORDERING && 1541 + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && 1542 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 1459 1543 return false; 1460 1544 1461 1545 return true; 1462 1546 } 1463 1547 1464 - int mlx5_ib_enable_driver(struct ib_device *dev); 1465 1548 int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); 1466 1549 1467 1550 static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
+1 -1
drivers/infiniband/hw/mlx5/mr.c
··· 1961 1961 } 1962 1962 1963 1963 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1964 - u32 max_num_sg, struct ib_udata *udata) 1964 + u32 max_num_sg) 1965 1965 { 1966 1966 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 1967 1967 }
+19 -9
drivers/infiniband/hw/mlx5/odp.c
··· 816 816 { 817 817 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 818 818 819 + lockdep_assert_held(&mr->dev->odp_srcu); 819 820 if (unlikely(io_virt < mr->mmkey.iova)) 820 821 return -EFAULT; 821 822 ··· 930 929 if (ret < 0) 931 930 goto srcu_unlock; 932 931 933 - /* 934 - * When prefetching a page, page fault is generated 935 - * in order to bring the page to the main memory. 936 - * In the current flow, page faults are being counted. 937 - */ 938 932 mlx5_update_odp_stats(mr, faults, ret); 939 933 940 934 npages += ret; ··· 1766 1770 { 1767 1771 struct prefetch_mr_work *work = 1768 1772 container_of(w, struct prefetch_mr_work, work); 1773 + struct mlx5_ib_dev *dev; 1769 1774 u32 bytes_mapped = 0; 1775 + int srcu_key; 1776 + int ret; 1770 1777 u32 i; 1771 1778 1772 - for (i = 0; i < work->num_sge; ++i) 1773 - pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, 1774 - work->frags[i].length, &bytes_mapped, 1775 - work->pf_flags); 1779 + /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ 1780 + WARN_ON(!work->num_sge); 1781 + dev = work->frags[0].mr->dev; 1782 + /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ 1783 + srcu_key = srcu_read_lock(&dev->odp_srcu); 1784 + for (i = 0; i < work->num_sge; ++i) { 1785 + ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, 1786 + work->frags[i].length, &bytes_mapped, 1787 + work->pf_flags); 1788 + if (ret <= 0) 1789 + continue; 1790 + mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); 1791 + } 1792 + srcu_read_unlock(&dev->odp_srcu, srcu_key); 1776 1793 1777 1794 destroy_prefetch_work(work); 1778 1795 } ··· 1841 1832 &bytes_mapped, pf_flags); 1842 1833 if (ret < 0) 1843 1834 goto out; 1835 + mlx5_update_odp_stats(mr, prefetch, ret); 1844 1836 } 1845 1837 ret = 0; 1846 1838
+25 -46
drivers/infiniband/hw/mlx5/qp.c
··· 38 38 #include <linux/mlx5/fs.h> 39 39 #include "mlx5_ib.h" 40 40 #include "ib_rep.h" 41 + #include "counters.h" 41 42 #include "cmd.h" 42 43 #include "qp.h" 43 44 #include "wr.h" ··· 2032 2031 switch (init_attr->qp_type) { 2033 2032 case IB_QPT_XRC_INI: 2034 2033 MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); 2035 - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); 2034 + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); 2036 2035 MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); 2037 2036 break; 2038 2037 default: 2039 2038 if (init_attr->srq) { 2040 - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); 2039 + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); 2041 2040 MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); 2042 2041 } else { 2043 - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); 2042 + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); 2044 2043 MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); 2045 2044 } 2046 2045 } ··· 2179 2178 MLX5_SET(qpc, qpc, no_sq, 1); 2180 2179 2181 2180 if (attr->srq) { 2182 - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); 2181 + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); 2183 2182 MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, 2184 2183 to_msrq(attr->srq)->msrq.srqn); 2185 2184 } else { 2186 - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); 2185 + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); 2187 2186 MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, 2188 2187 to_msrq(devr->s1)->msrq.srqn); 2189 2188 } ··· 3555 3554 switch (raw_qp_param->operation) { 3556 3555 case MLX5_CMD_OP_RST2INIT_QP: 3557 3556 rq_state = MLX5_RQC_STATE_RDY; 3558 - sq_state = MLX5_SQC_STATE_RDY; 3557 + sq_state = MLX5_SQC_STATE_RST; 3559 3558 break; 3560 3559 case MLX5_CMD_OP_2ERR_QP: 3561 3560 rq_state = MLX5_RQC_STATE_ERR; ··· 3567 3566 break; 3568 3567 case MLX5_CMD_OP_RTR2RTS_QP: 3569 3568 case MLX5_CMD_OP_RTS2RTS_QP: 3570 - if (raw_qp_param->set_mask == 3571 - MLX5_RAW_QP_RATE_LIMIT) { 3572 - modify_rq = 0; 3573 - sq_state = sq->state; 3574 - } else { 3575 - return raw_qp_param->set_mask ? -EINVAL : 0; 3576 - } 3569 + if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT) 3570 + return -EINVAL; 3571 + 3572 + modify_rq = 0; 3573 + sq_state = MLX5_SQC_STATE_RDY; 3577 3574 break; 3578 3575 case MLX5_CMD_OP_INIT2INIT_QP: 3579 3576 case MLX5_CMD_OP_INIT2RTR_QP: ··· 4113 4114 struct mlx5_ib_qp *qp = to_mqp(ibqp); 4114 4115 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4115 4116 enum ib_qp_state cur_state, new_state; 4116 - int err = 0; 4117 4117 int required = IB_QP_STATE; 4118 4118 void *dctc; 4119 + int err; 4119 4120 4120 4121 if (!(attr_mask & IB_QP_STATE)) 4121 4122 return -EINVAL; ··· 4207 4208 mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state); 4208 4209 return -EINVAL; 4209 4210 } 4210 - if (err) 4211 - qp->state = IB_QPS_ERR; 4212 - else 4213 - qp->state = new_state; 4214 - return err; 4211 + 4212 + qp->state = new_state; 4213 + return 0; 4215 4214 } 4216 4215 4217 4216 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ··· 4447 4450 [MLX5_SQ_STATE_NA] = IB_QPS_RESET, 4448 4451 }, 4449 4452 [MLX5_RQC_STATE_RDY] = { 4450 - [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, 4453 + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, 4451 4454 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, 4452 4455 [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, 4453 4456 [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, ··· 4459 4462 [MLX5_SQ_STATE_NA] = IB_QPS_ERR, 4460 4463 }, 4461 4464 [MLX5_RQ_STATE_NA] = { 4462 - [MLX5_SQC_STATE_RST] = IB_QPS_RESET, 4465 + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, 4463 4466 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, 4464 4467 [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, 4465 4468 [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, ··· 4705 4708 return err; 4706 4709 } 4707 4710 4708 - struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, 4709 - struct ib_udata *udata) 4711 + int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) 4710 4712 { 4711 - struct mlx5_ib_dev *dev = to_mdev(ibdev); 4712 - struct mlx5_ib_xrcd *xrcd; 4713 - int err; 4713 + struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device); 4714 + struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd); 4714 4715 4715 4716 if (!MLX5_CAP_GEN(dev->mdev, xrc)) 4716 - return ERR_PTR(-ENOSYS); 4717 + return -EOPNOTSUPP; 4717 4718 4718 - xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); 4719 - if (!xrcd) 4720 - return ERR_PTR(-ENOMEM); 4721 - 4722 - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); 4723 - if (err) { 4724 - kfree(xrcd); 4725 - return ERR_PTR(-ENOMEM); 4726 - } 4727 - 4728 - return &xrcd->ibxrcd; 4719 + return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); 4729 4720 } 4730 4721 4731 - int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 4722 + void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 4732 4723 { 4733 4724 struct mlx5_ib_dev *dev = to_mdev(xrcd->device); 4734 4725 u32 xrcdn = to_mxrcd(xrcd)->xrcdn; 4735 - int err; 4736 4726 4737 - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); 4738 - if (err) 4739 - mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); 4740 - 4741 - kfree(xrcd); 4742 - return 0; 4727 + mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); 4743 4728 } 4744 4729 4745 4730 static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
+1
drivers/infiniband/hw/mlx5/qp.h
··· 43 43 44 44 int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); 45 45 int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); 46 + int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); 46 47 #endif /* _MLX5_IB_QP_H */
+105 -16
drivers/infiniband/hw/mlx5/restrack.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 2 /* 3 - * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. 3 + * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved. 4 4 */ 5 5 6 6 #include <uapi/rdma/rdma_netlink.h> 7 + #include <linux/mlx5/rsc_dump.h> 7 8 #include <rdma/ib_umem_odp.h> 8 9 #include <rdma/restrack.h> 9 10 #include "mlx5_ib.h" 11 + #include "restrack.h" 10 12 11 - static int fill_stat_mr_entry(struct sk_buff *msg, 12 - struct rdma_restrack_entry *res) 13 + #define MAX_DUMP_SIZE 1024 14 + 15 + static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type, 16 + int index, void *data, int *data_len) 13 17 { 14 - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); 18 + struct mlx5_core_dev *mdev = dev; 19 + struct mlx5_rsc_dump_cmd *cmd; 20 + struct mlx5_rsc_key key = {}; 21 + struct page *page; 22 + int offset = 0; 23 + int err = 0; 24 + int cmd_err; 25 + int size; 26 + 27 + page = alloc_page(GFP_KERNEL); 28 + if (!page) 29 + return -ENOMEM; 30 + 31 + key.size = PAGE_SIZE; 32 + key.rsc = type; 33 + key.index1 = index; 34 + key.num_of_obj1 = 1; 35 + 36 + cmd = mlx5_rsc_dump_cmd_create(mdev, &key); 37 + if (IS_ERR(cmd)) { 38 + err = PTR_ERR(cmd); 39 + goto free_page; 40 + } 41 + 42 + do { 43 + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); 44 + if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) { 45 + err = cmd_err; 46 + goto destroy_cmd; 47 + } 48 + memcpy(data + offset, page_address(page), size); 49 + offset += size; 50 + } while (cmd_err > 0); 51 + *data_len = offset; 52 + 53 + destroy_cmd: 54 + mlx5_rsc_dump_cmd_destroy(cmd); 55 + free_page: 56 + __free_page(page); 57 + return err; 58 + } 59 + 60 + static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev, 61 + enum mlx5_sgmt_type type, u32 key) 62 + { 63 + int len = 0; 64 + void *data; 65 + int err; 66 + 67 + data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL); 68 + if (!data) 69 + return -ENOMEM; 70 + 71 + err = dump_rsc(dev->mdev, type, key, data, &len); 72 + if (err) 73 + goto out; 74 + 75 + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 76 + out: 77 + kfree(data); 78 + return err; 79 + } 80 + 81 + static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) 82 + { 15 83 struct mlx5_ib_mr *mr = to_mmr(ibmr); 16 84 struct nlattr *table_attr; 17 85 ··· 99 31 msg, "page_invalidations", 100 32 atomic64_read(&mr->odp_stats.invalidations))) 101 33 goto err_table; 34 + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", 35 + atomic64_read(&mr->odp_stats.prefetch))) 36 + goto err_table; 102 37 103 38 nla_nest_end(msg, table_attr); 104 39 return 0; ··· 112 41 return -EMSGSIZE; 113 42 } 114 43 115 - static int fill_res_mr_entry(struct sk_buff *msg, 116 - struct rdma_restrack_entry *res) 44 + static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) 117 45 { 118 - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); 46 + struct mlx5_ib_mr *mr = to_mmr(ibmr); 47 + 48 + return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, 49 + mlx5_mkey_to_idx(mr->mmkey.key)); 50 + } 51 + 52 + static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) 53 + { 119 54 struct mlx5_ib_mr *mr = to_mmr(ibmr); 120 55 struct nlattr *table_attr; 121 56 ··· 148 71 return -EMSGSIZE; 149 72 } 150 73 151 - int mlx5_ib_fill_res_entry(struct sk_buff *msg, 152 - struct rdma_restrack_entry *res) 74 + static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) 153 75 { 154 - if (res->type == RDMA_RESTRACK_MR) 155 - return fill_res_mr_entry(msg, res); 76 + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); 77 + struct mlx5_ib_cq *cq = to_mcq(ibcq); 156 78 157 - return 0; 79 + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); 158 80 } 159 81 160 - int mlx5_ib_fill_stat_entry(struct sk_buff *msg, 161 - struct rdma_restrack_entry *res) 82 + static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) 162 83 { 163 - if (res->type == RDMA_RESTRACK_MR) 164 - return fill_stat_mr_entry(msg, res); 84 + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 165 85 86 + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP, 87 + ibqp->qp_num); 88 + } 89 + 90 + static const struct ib_device_ops restrack_ops = { 91 + .fill_res_cq_entry_raw = fill_res_cq_entry_raw, 92 + .fill_res_mr_entry = fill_res_mr_entry, 93 + .fill_res_mr_entry_raw = fill_res_mr_entry_raw, 94 + .fill_res_qp_entry_raw = fill_res_qp_entry_raw, 95 + .fill_stat_mr_entry = fill_stat_mr_entry, 96 + }; 97 + 98 + int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev) 99 + { 100 + ib_set_device_ops(&dev->ib_dev, &restrack_ops); 166 101 return 0; 167 102 }
+13
drivers/infiniband/hw/mlx5/restrack.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* 3 + * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved. 4 + */ 5 + 6 + #ifndef _MLX5_IB_RESTRACK_H 7 + #define _MLX5_IB_RESTRACK_H 8 + 9 + #include "mlx5_ib.h" 10 + 11 + int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev); 12 + 13 + #endif /* _MLX5_IB_RESTRACK_H */
+2 -2
drivers/infiniband/hw/mlx5/srq.c
··· 274 274 if (srq->wq_sig) 275 275 in.flags |= MLX5_SRQ_FLAG_WQ_SIG; 276 276 277 - if (init_attr->srq_type == IB_SRQT_XRC) 277 + if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd) 278 278 in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; 279 279 else 280 - in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; 280 + in.xrcd = dev->devr.xrcdn0; 281 281 282 282 if (init_attr->srq_type == IB_SRQT_TM) { 283 283 in.tm_log_list_size =
+45
drivers/infiniband/hw/mlx5/std_types.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #include <rdma/uverbs_ioctl.h> 7 + #include <rdma/mlx5_user_ioctl_cmds.h> 8 + #include <rdma/mlx5_user_ioctl_verbs.h> 9 + #include <linux/mlx5/driver.h> 10 + #include "mlx5_ib.h" 11 + 12 + #define UVERBS_MODULE_NAME mlx5_ib 13 + #include <rdma/uverbs_named_ioctl.h> 14 + 15 + static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)( 16 + struct uverbs_attr_bundle *attrs) 17 + { 18 + struct ib_pd *pd = 19 + uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE); 20 + struct mlx5_ib_pd *mpd = to_mpd(pd); 21 + 22 + return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN, 23 + &mpd->pdn, sizeof(mpd->pdn)); 24 + } 25 + 26 + DECLARE_UVERBS_NAMED_METHOD( 27 + MLX5_IB_METHOD_PD_QUERY, 28 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE, 29 + UVERBS_OBJECT_PD, 30 + UVERBS_ACCESS_READ, 31 + UA_MANDATORY), 32 + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN, 33 + UVERBS_ATTR_TYPE(u32), 34 + UA_MANDATORY)); 35 + 36 + ADD_UVERBS_METHODS(mlx5_ib_pd, 37 + UVERBS_OBJECT_PD, 38 + &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY)); 39 + 40 + const struct uapi_definition mlx5_ib_std_types_defs[] = { 41 + UAPI_DEF_CHAIN_OBJ_TREE( 42 + UVERBS_OBJECT_PD, 43 + &mlx5_ib_pd), 44 + {}, 45 + };
+50 -18
drivers/infiniband/hw/mlx5/wr.c
··· 263 263 return cpu_to_be64(result); 264 264 } 265 265 266 - static __be64 get_umr_update_access_mask(int atomic) 266 + static __be64 get_umr_update_access_mask(int atomic, 267 + int relaxed_ordering_write, 268 + int relaxed_ordering_read) 267 269 { 268 270 u64 result; 269 271 ··· 276 274 277 275 if (atomic) 278 276 result |= MLX5_MKEY_MASK_A; 277 + 278 + if (relaxed_ordering_write) 279 + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 280 + 281 + if (relaxed_ordering_read) 282 + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 279 283 280 284 return cpu_to_be64(result); 281 285 } ··· 297 289 298 290 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 299 291 { 300 - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && 301 - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || 302 - (mask & MLX5_MKEY_MASK_A && 303 - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) 292 + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 293 + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 304 294 return -EPERM; 295 + 296 + if (mask & MLX5_MKEY_MASK_A && 297 + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 298 + return -EPERM; 299 + 300 + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 301 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 302 + return -EPERM; 303 + 304 + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 305 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 306 + return -EPERM; 307 + 305 308 return 0; 306 309 } 307 310 308 311 static int set_reg_umr_segment(struct mlx5_ib_dev *dev, 309 312 struct mlx5_wqe_umr_ctrl_seg *umr, 310 - const struct ib_send_wr *wr, int atomic) 313 + const struct ib_send_wr *wr) 311 314 { 312 315 const struct mlx5_umr_wr *umrwr = umr_wr(wr); 313 316 ··· 344 325 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) 345 326 umr->mkey_mask |= get_umr_update_translation_mask(); 346 327 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { 347 - umr->mkey_mask |= get_umr_update_access_mask(atomic); 328 + umr->mkey_mask |= get_umr_update_access_mask( 329 + !!(MLX5_CAP_GEN(dev->mdev, atomic)), 330 + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), 331 + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); 348 332 umr->mkey_mask |= get_umr_update_pd_mask(); 349 333 } 350 334 if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) ··· 405 383 406 384 memset(seg, 0, sizeof(*seg)); 407 385 if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 408 - seg->status = MLX5_MKEY_STATUS_FREE; 386 + MLX5_SET(mkc, seg, free, 1); 409 387 410 - seg->flags = convert_access(umrwr->access_flags); 388 + MLX5_SET(mkc, seg, a, 389 + !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); 390 + MLX5_SET(mkc, seg, rw, 391 + !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); 392 + MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); 393 + MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); 394 + MLX5_SET(mkc, seg, lr, 1); 395 + MLX5_SET(mkc, seg, relaxed_ordering_write, 396 + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 397 + MLX5_SET(mkc, seg, relaxed_ordering_read, 398 + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 399 + 411 400 if (umrwr->pd) 412 - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); 401 + MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); 413 402 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && 414 403 !umrwr->length) 415 - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); 404 + MLX5_SET(mkc, seg, length64, 1); 416 405 417 - seg->start_addr = cpu_to_be64(umrwr->virt_addr); 418 - seg->len = cpu_to_be64(umrwr->length); 419 - seg->log2_page_size = umrwr->page_shift; 420 - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | 421 - mlx5_mkey_variant(umrwr->mkey)); 406 + MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); 407 + MLX5_SET64(mkc, seg, len, umrwr->length); 408 + MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); 409 + MLX5_SET(mkc, seg, qpn, 0xffffff); 410 + MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); 422 411 } 423 412 424 413 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, ··· 1257 1224 1258 1225 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 1259 1226 (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); 1260 - err = set_reg_umr_segment(dev, *seg, wr, 1261 - !!(MLX5_CAP_GEN(dev->mdev, atomic))); 1227 + err = set_reg_umr_segment(dev, *seg, wr); 1262 1228 if (unlikely(err)) 1263 1229 goto out; 1264 1230 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 2901 2901 } 2902 2902 2903 2903 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, 2904 - u32 max_num_sg, struct ib_udata *udata) 2904 + u32 max_num_sg) 2905 2905 { 2906 2906 int status; 2907 2907 struct ocrdma_mr *mr;
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
··· 101 101 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, 102 102 u64 virt, int acc, struct ib_udata *); 103 103 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 104 - u32 max_num_sg, struct ib_udata *udata); 104 + u32 max_num_sg); 105 105 int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 106 106 unsigned int *sg_offset); 107 107
+1 -2
drivers/infiniband/hw/qedr/main.c
··· 110 110 if (err) 111 111 return err; 112 112 113 - immutable->pkey_tbl_len = 1; 114 113 immutable->gid_tbl_len = 1; 115 114 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; 116 115 immutable->max_mad_size = 0; ··· 178 179 179 180 static const struct ib_device_ops qedr_roce_dev_ops = { 180 181 .get_port_immutable = qedr_roce_port_immutable, 182 + .query_pkey = qedr_query_pkey, 181 183 }; 182 184 183 185 static void qedr_roce_register_device(struct qedr_dev *dev) ··· 221 221 .post_srq_recv = qedr_post_srq_recv, 222 222 .process_mad = qedr_process_mad, 223 223 .query_device = qedr_query_device, 224 - .query_pkey = qedr_query_pkey, 225 224 .query_port = qedr_query_port, 226 225 .query_qp = qedr_query_qp, 227 226 .query_srq = qedr_query_srq,
+3 -2
drivers/infiniband/hw/qedr/qedr.h
··· 235 235 u32 dpi_size; 236 236 u16 dpi; 237 237 bool db_rec; 238 + u8 edpm_mode; 238 239 }; 239 240 240 241 union db_prod32 { ··· 345 344 u32 wqe_prod; 346 345 u32 sge_prod; 347 346 u32 wr_prod_cnt; 348 - u32 wr_cons_cnt; 347 + atomic_t wr_cons_cnt; 349 348 u32 num_elems; 350 349 351 - u32 *virt_prod_pair_addr; 350 + struct rdma_srq_producers *virt_prod_pair_addr; 352 351 dma_addr_t phy_prod_pair_addr; 353 352 }; 354 353
+25 -20
drivers/infiniband/hw/qedr/verbs.c
··· 239 239 attr->ip_gids = true; 240 240 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 241 241 attr->gid_tbl_len = 1; 242 - attr->pkey_tbl_len = 1; 243 242 } else { 244 243 attr->gid_tbl_len = QEDR_MAX_SGID; 245 244 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; ··· 274 275 DP_ERR(dev, "Problem copying data from user space\n"); 275 276 return -EFAULT; 276 277 } 277 - 278 + ctx->edpm_mode = !!(ureq.context_flags & 279 + QEDR_ALLOC_UCTX_EDPM_MODE); 278 280 ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); 279 281 } 280 282 ··· 316 316 uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; 317 317 else 318 318 uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | 319 - QEDR_DPM_TYPE_ROCE_LEGACY; 319 + QEDR_DPM_TYPE_ROCE_LEGACY | 320 + QEDR_DPM_TYPE_ROCE_EDPM_MODE; 320 321 321 - uresp.dpm_flags |= QEDR_DPM_SIZES_SET; 322 - uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; 323 - uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; 322 + if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) { 323 + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; 324 + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; 325 + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; 326 + uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE; 327 + } 324 328 325 329 uresp.wids_enabled = 1; 326 330 uresp.wid_count = oparams.wid_count; ··· 1758 1754 struct qed_rdma_create_qp_out_params out_params; 1759 1755 struct qedr_pd *pd = get_qedr_pd(ibpd); 1760 1756 struct qedr_create_qp_uresp uresp; 1761 - struct qedr_ucontext *ctx = NULL; 1757 + struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; 1762 1758 struct qedr_create_qp_ureq ureq; 1763 1759 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); 1764 1760 int rc = -EINVAL; ··· 1795 1791 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; 1796 1792 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; 1797 1793 } 1794 + 1795 + if (ctx) 1796 + SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode); 1798 1797 1799 1798 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1800 1799 &in_params, &out_params); ··· 3011 3004 } 3012 3005 3013 3006 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, 3014 - u32 max_num_sg, struct ib_udata *udata) 3007 + u32 max_num_sg) 3015 3008 { 3016 3009 struct qedr_mr *mr; 3017 3010 ··· 3694 3687 * count and consumer count and subtract it from max 3695 3688 * work request supported so that we get elements left. 3696 3689 */ 3697 - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; 3690 + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); 3698 3691 3699 3692 return hw_srq->max_wr - used; 3700 3693 } ··· 3709 3702 unsigned long flags; 3710 3703 int status = 0; 3711 3704 u32 num_sge; 3712 - u32 offset; 3713 3705 3714 3706 spin_lock_irqsave(&srq->lock, flags); 3715 3707 ··· 3721 3715 if (!qedr_srq_elem_left(hw_srq) || 3722 3716 wr->num_sge > srq->hw_srq.max_sges) { 3723 3717 DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", 3724 - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, 3718 + hw_srq->wr_prod_cnt, 3719 + atomic_read(&hw_srq->wr_cons_cnt), 3725 3720 wr->num_sge, srq->hw_srq.max_sges); 3726 3721 status = -ENOMEM; 3727 3722 *bad_wr = wr; ··· 3756 3749 hw_srq->sge_prod++; 3757 3750 } 3758 3751 3759 - /* Flush WQE and SGE information before 3752 + /* Update WQE and SGE information before 3760 3753 * updating producer. 3761 3754 */ 3762 - wmb(); 3755 + dma_wmb(); 3763 3756 3764 3757 /* SRQ producer is 8 bytes. Need to update SGE producer index 3765 3758 * in first 4 bytes and need to update WQE producer in 3766 3759 * next 4 bytes. 3767 3760 */ 3768 - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; 3769 - offset = offsetof(struct rdma_srq_producers, wqe_prod); 3770 - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = 3771 - hw_srq->wqe_prod; 3761 + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; 3762 + /* Make sure sge producer is updated first */ 3763 + dma_wmb(); 3764 + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; 3772 3765 3773 - /* Flush producer after updating it. */ 3774 - wmb(); 3775 3766 wr = wr->next; 3776 3767 } 3777 3768 ··· 4188 4183 } else { 4189 4184 __process_resp_one(dev, qp, cq, wc, resp, wr_id); 4190 4185 } 4191 - srq->hw_srq.wr_cons_cnt++; 4186 + atomic_inc(&srq->hw_srq.wr_cons_cnt); 4192 4187 4193 4188 return 1; 4194 4189 }
+1 -1
drivers/infiniband/hw/qedr/verbs.h
··· 84 84 int sg_nents, unsigned int *sg_offset); 85 85 86 86 struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 87 - u32 max_num_sg, struct ib_udata *udata); 87 + u32 max_num_sg); 88 88 int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); 89 89 int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, 90 90 const struct ib_send_wr **bad_wr);
+2 -2
drivers/infiniband/hw/usnic/usnic_fwd.c
··· 214 214 if (!flow) 215 215 return ERR_PTR(-ENOMEM); 216 216 217 - tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); 217 + tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC); 218 218 if (!tlv) { 219 219 usnic_err("Failed to allocate memory\n"); 220 220 status = -ENOMEM; ··· 258 258 259 259 out_free_tlv: 260 260 spin_unlock(&ufdev->lock); 261 - pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); 261 + dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa); 262 262 if (!status) 263 263 return flow; 264 264 out_free_flow:
+1 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
··· 202 202 * @return: ib_mr pointer on success, otherwise returns an errno. 203 203 */ 204 204 struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 205 - u32 max_num_sg, struct ib_udata *udata) 205 + u32 max_num_sg) 206 206 { 207 207 struct pvrdma_dev *dev = to_vdev(pd->device); 208 208 struct pvrdma_user_mr *mr;
+1 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
··· 406 406 struct ib_udata *udata); 407 407 int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); 408 408 struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 409 - u32 max_num_sg, struct ib_udata *udata); 409 + u32 max_num_sg); 410 410 int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 411 411 int sg_nents, unsigned int *sg_offset); 412 412 int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+1 -2
drivers/infiniband/sw/rdmavt/ah.c
··· 90 90 /** 91 91 * rvt_create_ah - create an address handle 92 92 * @ibah: the IB address handle 93 - * @ah_attr: the attributes of the AH 94 - * @create_flags: create address handle flags (see enum rdma_create_ah_flags) 93 + * @init_attr: the attributes of the AH 95 94 * @udata: pointer to user's input output buffer information. 96 95 * 97 96 * This may be called from interrupt context.
+1 -1
drivers/infiniband/sw/rdmavt/mr.c
··· 576 576 * Return: the memory region on success, otherwise return an errno. 577 577 */ 578 578 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 579 - u32 max_num_sg, struct ib_udata *udata) 579 + u32 max_num_sg) 580 580 { 581 581 struct rvt_mr *mr; 582 582
+1 -1
drivers/infiniband/sw/rdmavt/mr.h
··· 71 71 struct ib_udata *udata); 72 72 int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 73 73 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 74 - u32 max_num_sg, struct ib_udata *udata); 74 + u32 max_num_sg); 75 75 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 76 76 int sg_nents, unsigned int *sg_offset); 77 77
+4 -37
drivers/infiniband/sw/rxe/rxe.c
··· 40 40 MODULE_DESCRIPTION("Soft RDMA transport"); 41 41 MODULE_LICENSE("Dual BSD/GPL"); 42 42 43 - /* free resources for all ports on a device */ 44 - static void rxe_cleanup_ports(struct rxe_dev *rxe) 45 - { 46 - kfree(rxe->port.pkey_tbl); 47 - rxe->port.pkey_tbl = NULL; 48 - 49 - } 50 - 51 43 /* free resources for a rxe device all objects created for this device must 52 44 * have been destroyed 53 45 */ ··· 57 65 rxe_pool_cleanup(&rxe->mw_pool); 58 66 rxe_pool_cleanup(&rxe->mc_grp_pool); 59 67 rxe_pool_cleanup(&rxe->mc_elem_pool); 60 - 61 - rxe_cleanup_ports(rxe); 62 68 63 69 if (rxe->tfm) 64 70 crypto_free_shash(rxe->tfm); ··· 101 111 } 102 112 103 113 /* initialize port attributes */ 104 - static int rxe_init_port_param(struct rxe_port *port) 114 + static void rxe_init_port_param(struct rxe_port *port) 105 115 { 106 116 port->attr.state = IB_PORT_DOWN; 107 117 port->attr.max_mtu = IB_MTU_4096; ··· 124 134 port->attr.phys_state = RXE_PORT_PHYS_STATE; 125 135 port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); 126 136 port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); 127 - 128 - return 0; 129 137 } 130 138 131 139 /* initialize port state, note IB convention that HCA ports are always 132 140 * numbered from 1 133 141 */ 134 - static int rxe_init_ports(struct rxe_dev *rxe) 142 + static void rxe_init_ports(struct rxe_dev *rxe) 135 143 { 136 144 struct rxe_port *port = &rxe->port; 137 145 138 146 rxe_init_port_param(port); 139 - 140 - if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) 141 - return -EINVAL; 142 - 143 - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, 144 - sizeof(*port->pkey_tbl), GFP_KERNEL); 145 - 146 - if (!port->pkey_tbl) 147 - return -ENOMEM; 148 - 149 - port->pkey_tbl[0] = 0xffff; 150 147 addrconf_addr_eui48((unsigned char *)&port->port_guid, 151 148 rxe->ndev->dev_addr); 152 - 153 149 spin_lock_init(&port->port_lock); 154 - 155 - return 0; 156 150 } 157 151 158 152 /* init pools of managed objects */ ··· 226 252 /* init default device parameters */ 227 253 rxe_init_device_param(rxe); 228 254 229 - err = rxe_init_ports(rxe); 230 - if (err) 231 - goto err1; 255 + rxe_init_ports(rxe); 232 256 233 257 err = rxe_init_pools(rxe); 234 258 if (err) 235 - goto err2; 259 + return err; 236 260 237 261 /* init pending mmap list */ 238 262 spin_lock_init(&rxe->mmap_offset_lock); ··· 240 268 mutex_init(&rxe->usdev_lock); 241 269 242 270 return 0; 243 - 244 - err2: 245 - rxe_cleanup_ports(rxe); 246 - err1: 247 - return err; 248 271 } 249 272 250 273 void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
+2 -6
drivers/infiniband/sw/rxe/rxe_loc.h
··· 103 103 from_mem_obj, 104 104 }; 105 105 106 - int rxe_mem_init_dma(struct rxe_pd *pd, 107 - int access, struct rxe_mem *mem); 106 + void rxe_mem_init_dma(struct rxe_pd *pd, 107 + int access, struct rxe_mem *mem); 108 108 109 109 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, 110 110 u64 length, u64 iova, int access, struct ib_udata *udata, ··· 132 132 133 133 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); 134 134 135 - int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 136 - u64 *page, int num_pages, u64 iova); 137 - 138 135 void rxe_mem_cleanup(struct rxe_pool_entry *arg); 139 136 140 137 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); ··· 142 145 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, 143 146 int paylen, struct rxe_pkt_info *pkt); 144 147 int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); 145 - enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); 146 148 const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); 147 149 struct device *rxe_dma_device(struct rxe_dev *rxe); 148 150 int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
+2 -48
drivers/infiniband/sw/rxe/rxe_mr.c
··· 144 144 return -ENOMEM; 145 145 } 146 146 147 - int rxe_mem_init_dma(struct rxe_pd *pd, 148 - int access, struct rxe_mem *mem) 147 + void rxe_mem_init_dma(struct rxe_pd *pd, 148 + int access, struct rxe_mem *mem) 149 149 { 150 150 rxe_mem_init(access, mem); 151 151 ··· 153 153 mem->access = access; 154 154 mem->state = RXE_MEM_STATE_VALID; 155 155 mem->type = RXE_MEM_TYPE_DMA; 156 - 157 - return 0; 158 156 } 159 157 160 158 int rxe_mem_init_user(struct rxe_pd *pd, u64 start, ··· 584 586 } 585 587 586 588 return mem; 587 - } 588 - 589 - int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 590 - u64 *page, int num_pages, u64 iova) 591 - { 592 - int i; 593 - int num_buf; 594 - int err; 595 - struct rxe_map **map; 596 - struct rxe_phys_buf *buf; 597 - int page_size; 598 - 599 - if (num_pages > mem->max_buf) { 600 - err = -EINVAL; 601 - goto err1; 602 - } 603 - 604 - num_buf = 0; 605 - page_size = 1 << mem->page_shift; 606 - map = mem->map; 607 - buf = map[0]->buf; 608 - 609 - for (i = 0; i < num_pages; i++) { 610 - buf->addr = *page++; 611 - buf->size = page_size; 612 - buf++; 613 - num_buf++; 614 - 615 - if (num_buf == RXE_BUF_PER_MAP) { 616 - map++; 617 - buf = map[0]->buf; 618 - num_buf = 0; 619 - } 620 - } 621 - 622 - mem->iova = iova; 623 - mem->va = iova; 624 - mem->length = num_pages << mem->page_shift; 625 - mem->state = RXE_MEM_STATE_VALID; 626 - 627 - return 0; 628 - 629 - err1: 630 - return err; 631 589 }
-5
drivers/infiniband/sw/rxe/rxe_net.c
··· 520 520 return rxe->ndev->name; 521 521 } 522 522 523 - enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) 524 - { 525 - return IB_LINK_LAYER_ETHERNET; 526 - } 527 - 528 523 int rxe_net_add(const char *ibdev_name, struct net_device *ndev) 529 524 { 530 525 int err;
+2 -2
drivers/infiniband/sw/rxe/rxe_param.h
··· 100 100 RXE_MAX_SRQ_SGE = 27, 101 101 RXE_MIN_SRQ_SGE = 1, 102 102 RXE_MAX_FMR_PAGE_LIST_LEN = 512, 103 - RXE_MAX_PKEYS = 64, 103 + RXE_MAX_PKEYS = 1, 104 104 RXE_LOCAL_CA_ACK_DELAY = 15, 105 105 106 106 RXE_MAX_UCONTEXT = 512, ··· 148 148 RXE_PORT_INIT_TYPE_REPLY = 0, 149 149 RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, 150 150 RXE_PORT_ACTIVE_SPEED = 1, 151 - RXE_PORT_PKEY_TBL_LEN = 64, 151 + RXE_PORT_PKEY_TBL_LEN = 1, 152 152 RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, 153 153 RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, 154 154 };
+9 -26
drivers/infiniband/sw/rxe/rxe_recv.c
··· 101 101 static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 102 102 u32 qpn, struct rxe_qp *qp) 103 103 { 104 - int i; 105 - int found_pkey = 0; 106 104 struct rxe_port *port = &rxe->port; 107 105 u16 pkey = bth_pkey(pkt); 108 106 109 107 pkt->pkey_index = 0; 110 108 111 - if (qpn == 1) { 112 - for (i = 0; i < port->attr.pkey_tbl_len; i++) { 113 - if (pkey_match(pkey, port->pkey_tbl[i])) { 114 - pkt->pkey_index = i; 115 - found_pkey = 1; 116 - break; 117 - } 118 - } 119 - 120 - if (!found_pkey) { 121 - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); 122 - set_bad_pkey_cntr(port); 123 - goto err1; 124 - } 125 - } else { 126 - if (unlikely(!pkey_match(pkey, 127 - port->pkey_tbl[qp->attr.pkey_index] 128 - ))) { 129 - pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); 130 - set_bad_pkey_cntr(port); 131 - goto err1; 132 - } 133 - pkt->pkey_index = qp->attr.pkey_index; 109 + if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { 110 + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); 111 + set_bad_pkey_cntr(port); 112 + goto err1; 134 113 } 135 114 136 115 if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && ··· 309 330 310 331 static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) 311 332 { 333 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 312 334 const struct ib_gid_attr *gid_attr; 313 335 union ib_gid dgid; 314 336 union ib_gid *pdgid; 337 + 338 + if (pkt->mask & RXE_LOOPBACK_MASK) 339 + return 0; 315 340 316 341 if (skb->protocol == htons(ETH_P_IP)) { 317 342 ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, ··· 349 366 if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) 350 367 goto drop; 351 368 352 - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { 369 + if (rxe_match_dgid(rxe, skb) < 0) { 353 370 pr_warn_ratelimited("failed matching dgid\n"); 354 371 goto drop; 355 372 }
+1 -4
drivers/infiniband/sw/rxe/rxe_req.c
··· 381 381 struct rxe_pkt_info *pkt) 382 382 { 383 383 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 384 - struct rxe_port *port = &rxe->port; 385 384 struct sk_buff *skb; 386 385 struct rxe_send_wr *ibwr = &wqe->wr; 387 386 struct rxe_av *av; ··· 418 419 (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == 419 420 (RXE_WRITE_MASK | RXE_IMMDT_MASK)); 420 421 421 - pkey = (qp_type(qp) == IB_QPT_GSI) ? 422 - port->pkey_tbl[ibwr->wr.ud.pkey_index] : 423 - port->pkey_tbl[qp->attr.pkey_index]; 422 + pkey = IB_DEFAULT_PKEY_FULL; 424 423 425 424 qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : 426 425 qp->attr.dest_qp_num;
+12 -36
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 83 83 static int rxe_query_pkey(struct ib_device *device, 84 84 u8 port_num, u16 index, u16 *pkey) 85 85 { 86 - struct rxe_dev *rxe = to_rdev(device); 87 - struct rxe_port *port; 86 + if (index > 0) 87 + return -EINVAL; 88 88 89 - port = &rxe->port; 90 - 91 - if (unlikely(index >= port->attr.pkey_tbl_len)) { 92 - dev_warn(device->dev.parent, "invalid index = %d\n", 93 - index); 94 - goto err1; 95 - } 96 - 97 - *pkey = port->pkey_tbl[index]; 89 + *pkey = IB_DEFAULT_PKEY_FULL; 98 90 return 0; 99 - 100 - err1: 101 - return -EINVAL; 102 91 } 103 92 104 93 static int rxe_modify_device(struct ib_device *dev, ··· 130 141 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, 131 142 u8 port_num) 132 143 { 133 - struct rxe_dev *rxe = to_rdev(dev); 134 - 135 - return rxe_link_layer(rxe, port_num); 144 + return IB_LINK_LAYER_ETHERNET; 136 145 } 137 146 138 147 static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) ··· 671 684 unsigned int mask; 672 685 unsigned int length = 0; 673 686 int i; 687 + struct ib_send_wr *next; 674 688 675 689 while (wr) { 676 690 mask = wr_opcode_mask(wr->opcode, qp); ··· 688 700 break; 689 701 } 690 702 703 + next = wr->next; 704 + 691 705 length = 0; 692 706 for (i = 0; i < wr->num_sge; i++) 693 707 length += wr->sg_list[i].length; ··· 700 710 *bad_wr = wr; 701 711 break; 702 712 } 703 - wr = wr->next; 713 + wr = next; 704 714 } 705 715 706 716 rxe_run_task(&qp->req.task, 1); ··· 891 901 struct rxe_dev *rxe = to_rdev(ibpd->device); 892 902 struct rxe_pd *pd = to_rpd(ibpd); 893 903 struct rxe_mem *mr; 894 - int err; 895 904 896 905 mr = rxe_alloc(&rxe->mr_pool); 897 - if (!mr) { 898 - err = -ENOMEM; 899 - goto err1; 900 - } 906 + if (!mr) 907 + return ERR_PTR(-ENOMEM); 901 908 902 909 rxe_add_index(mr); 903 - 904 910 rxe_add_ref(pd); 905 - 906 - err = rxe_mem_init_dma(pd, access, mr); 907 - if (err) 908 - goto err2; 911 + rxe_mem_init_dma(pd, access, mr); 909 912 910 913 return &mr->ibmr; 911 - 912 - err2: 913 - rxe_drop_ref(pd); 914 - rxe_drop_index(mr); 915 - rxe_drop_ref(mr); 916 - err1: 917 - return ERR_PTR(err); 918 914 } 919 915 920 916 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, ··· 951 975 } 952 976 953 977 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, 954 - u32 max_num_sg, struct ib_udata *udata) 978 + u32 max_num_sg) 955 979 { 956 980 struct rxe_dev *rxe = to_rdev(ibpd->device); 957 981 struct rxe_pd *pd = to_rpd(ibpd);
-1
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 371 371 372 372 struct rxe_port { 373 373 struct ib_port_attr attr; 374 - u16 *pkey_tbl; 375 374 __be64 port_guid; 376 375 __be64 subnet_prefix; 377 376 spinlock_t port_lock; /* guard port */
-1
drivers/infiniband/sw/siw/siw_main.c
··· 289 289 .post_srq_recv = siw_post_srq_recv, 290 290 .query_device = siw_query_device, 291 291 .query_gid = siw_query_gid, 292 - .query_pkey = siw_query_pkey, 293 292 .query_port = siw_query_port, 294 293 .query_qp = siw_query_qp, 295 294 .query_srq = siw_query_srq,
+1 -10
drivers/infiniband/sw/siw/siw_verbs.c
··· 176 176 attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); 177 177 attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 178 178 IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; 179 - attr->pkey_tbl_len = 1; 180 179 attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; 181 180 attr->state = sdev->state; 182 181 /* ··· 203 204 if (rv) 204 205 return rv; 205 206 206 - port_immutable->pkey_tbl_len = attr.pkey_tbl_len; 207 207 port_immutable->gid_tbl_len = attr.gid_tbl_len; 208 208 port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; 209 209 210 - return 0; 211 - } 212 - 213 - int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey) 214 - { 215 - /* Report the default pkey */ 216 - *pkey = 0xffff; 217 210 return 0; 218 211 } 219 212 ··· 1364 1373 } 1365 1374 1366 1375 struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1367 - u32 max_sge, struct ib_udata *udata) 1376 + u32 max_sge) 1368 1377 { 1369 1378 struct siw_device *sdev = to_siw_dev(pd->device); 1370 1379 struct siw_mr *mr = NULL;
+1 -2
drivers/infiniband/sw/siw/siw_verbs.h
··· 46 46 struct ib_udata *udata); 47 47 int siw_query_port(struct ib_device *base_dev, u8 port, 48 48 struct ib_port_attr *attr); 49 - int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey); 50 49 int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, 51 50 union ib_gid *gid); 52 51 int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); ··· 68 69 struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, 69 70 u64 rnic_va, int rights, struct ib_udata *udata); 70 71 struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type, 71 - u32 max_sge, struct ib_udata *udata); 72 + u32 max_sge); 72 73 struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights); 73 74 int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, 74 75 unsigned int *sg_off);
+2 -2
drivers/infiniband/ulp/ipoib/ipoib.h
··· 515 515 516 516 int ipoib_ib_dev_open_default(struct net_device *dev); 517 517 int ipoib_ib_dev_open(struct net_device *dev); 518 - int ipoib_ib_dev_stop(struct net_device *dev); 518 + void ipoib_ib_dev_stop(struct net_device *dev); 519 519 void ipoib_ib_dev_up(struct net_device *dev); 520 520 void ipoib_ib_dev_down(struct net_device *dev); 521 521 int ipoib_ib_dev_stop_default(struct net_device *dev); ··· 527 527 528 528 void ipoib_mcast_restart_task(struct work_struct *work); 529 529 void ipoib_mcast_start_thread(struct net_device *dev); 530 - int ipoib_mcast_stop_thread(struct net_device *dev); 530 + void ipoib_mcast_stop_thread(struct net_device *dev); 531 531 532 532 void ipoib_mcast_dev_down(struct net_device *dev); 533 533 void ipoib_mcast_dev_flush(struct net_device *dev);
+29 -38
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 670 670 return rc; 671 671 } 672 672 673 - static void __ipoib_reap_ah(struct net_device *dev) 673 + static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) 674 674 { 675 - struct ipoib_dev_priv *priv = ipoib_priv(dev); 676 675 struct ipoib_ah *ah, *tah; 677 676 unsigned long flags; 678 677 679 - netif_tx_lock_bh(dev); 678 + netif_tx_lock_bh(priv->dev); 680 679 spin_lock_irqsave(&priv->lock, flags); 681 680 682 681 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) ··· 686 687 } 687 688 688 689 spin_unlock_irqrestore(&priv->lock, flags); 689 - netif_tx_unlock_bh(dev); 690 + netif_tx_unlock_bh(priv->dev); 690 691 } 691 692 692 693 void ipoib_reap_ah(struct work_struct *work) 693 694 { 694 695 struct ipoib_dev_priv *priv = 695 696 container_of(work, struct ipoib_dev_priv, ah_reap_task.work); 696 - struct net_device *dev = priv->dev; 697 697 698 - __ipoib_reap_ah(dev); 698 + ipoib_reap_dead_ahs(priv); 699 699 700 700 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 701 701 queue_delayed_work(priv->wq, &priv->ah_reap_task, 702 702 round_jiffies_relative(HZ)); 703 703 } 704 704 705 - static void ipoib_flush_ah(struct net_device *dev) 705 + static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) 706 706 { 707 - struct ipoib_dev_priv *priv = ipoib_priv(dev); 708 - 709 - cancel_delayed_work(&priv->ah_reap_task); 710 - flush_workqueue(priv->wq); 711 - ipoib_reap_ah(&priv->ah_reap_task.work); 707 + clear_bit(IPOIB_STOP_REAPER, &priv->flags); 708 + queue_delayed_work(priv->wq, &priv->ah_reap_task, 709 + round_jiffies_relative(HZ)); 712 710 } 713 711 714 - static void ipoib_stop_ah(struct net_device *dev) 712 + static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) 715 713 { 716 - struct ipoib_dev_priv *priv = ipoib_priv(dev); 717 - 718 714 set_bit(IPOIB_STOP_REAPER, &priv->flags); 719 - ipoib_flush_ah(dev); 715 + cancel_delayed_work(&priv->ah_reap_task); 716 + /* 717 + * After ipoib_stop_ah_reaper() we always go through 718 + * ipoib_reap_dead_ahs() which ensures the work is really stopped and 719 + * does a final flush out of the dead_ah's list 720 + */ 720 721 } 721 722 722 723 static int recvs_pending(struct net_device *dev) ··· 845 846 return 0; 846 847 } 847 848 848 - int ipoib_ib_dev_stop(struct net_device *dev) 849 - { 850 - struct ipoib_dev_priv *priv = ipoib_priv(dev); 851 - 852 - priv->rn_ops->ndo_stop(dev); 853 - 854 - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 855 - ipoib_flush_ah(dev); 856 - 857 - return 0; 858 - } 859 - 860 849 int ipoib_ib_dev_open_default(struct net_device *dev) 861 850 { 862 851 struct ipoib_dev_priv *priv = ipoib_priv(dev); ··· 888 901 return -1; 889 902 } 890 903 891 - clear_bit(IPOIB_STOP_REAPER, &priv->flags); 892 - queue_delayed_work(priv->wq, &priv->ah_reap_task, 893 - round_jiffies_relative(HZ)); 894 - 904 + ipoib_start_ah_reaper(priv); 895 905 if (priv->rn_ops->ndo_open(dev)) { 896 906 pr_warn("%s: Failed to open dev\n", dev->name); 897 907 goto dev_stop; ··· 899 915 return 0; 900 916 901 917 dev_stop: 902 - set_bit(IPOIB_STOP_REAPER, &priv->flags); 903 - cancel_delayed_work(&priv->ah_reap_task); 904 - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 905 - ipoib_ib_dev_stop(dev); 918 + ipoib_stop_ah_reaper(priv); 906 919 return -1; 920 + } 921 + 922 + void ipoib_ib_dev_stop(struct net_device *dev) 923 + { 924 + struct ipoib_dev_priv *priv = ipoib_priv(dev); 925 + 926 + priv->rn_ops->ndo_stop(dev); 927 + 928 + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 929 + ipoib_stop_ah_reaper(priv); 907 930 } 908 931 909 932 void ipoib_pkey_dev_check_presence(struct net_device *dev) ··· 1223 1232 ipoib_mcast_dev_flush(dev); 1224 1233 if (oper_up) 1225 1234 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 1226 - ipoib_flush_ah(dev); 1235 + ipoib_reap_dead_ahs(priv); 1227 1236 } 1228 1237 1229 1238 if (level >= IPOIB_FLUSH_NORMAL) ··· 1298 1307 * the neighbor garbage collection is stopped and reaped. 1299 1308 * That should all be done now, so make a final ah flush. 1300 1309 */ 1301 - ipoib_stop_ah(dev); 1310 + ipoib_reap_dead_ahs(priv); 1302 1311 1303 1312 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 1304 1313
+11 -2
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 1892 1892 1893 1893 priv->max_ib_mtu = ppriv->max_ib_mtu; 1894 1894 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); 1895 - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); 1896 - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); 1895 + if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN)) 1896 + memcpy(&priv->local_gid, priv->dev->dev_addr + 4, 1897 + sizeof(priv->local_gid)); 1898 + else { 1899 + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, 1900 + INFINIBAND_ALEN); 1901 + memcpy(&priv->local_gid, &ppriv->local_gid, 1902 + sizeof(priv->local_gid)); 1903 + } 1897 1904 } 1898 1905 1899 1906 static int ipoib_ndo_init(struct net_device *ndev) ··· 1983 1976 1984 1977 /* no more works over the priv->wq */ 1985 1978 if (priv->wq) { 1979 + /* See ipoib_mcast_carrier_on_task() */ 1980 + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); 1986 1981 flush_workqueue(priv->wq); 1987 1982 destroy_workqueue(priv->wq); 1988 1983 priv->wq = NULL;
+1 -3
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
··· 680 680 spin_unlock_irqrestore(&priv->lock, flags); 681 681 } 682 682 683 - int ipoib_mcast_stop_thread(struct net_device *dev) 683 + void ipoib_mcast_stop_thread(struct net_device *dev) 684 684 { 685 685 struct ipoib_dev_priv *priv = ipoib_priv(dev); 686 686 687 687 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 688 688 689 689 cancel_delayed_work_sync(&priv->mcast_task); 690 - 691 - return 0; 692 690 } 693 691 694 692 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+6 -19
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 300 300 struct iscsi_iser_task; 301 301 302 302 /** 303 - * struct iser_comp - iSER completion context 304 - * 305 - * @cq: completion queue 306 - * @active_qps: Number of active QPs attached 307 - * to completion context 308 - */ 309 - struct iser_comp { 310 - struct ib_cq *cq; 311 - int active_qps; 312 - }; 313 - 314 - /** 315 303 * struct iser_device - iSER device handle 316 304 * 317 305 * @ib_device: RDMA device ··· 308 320 * @event_handler: IB events handle routine 309 321 * @ig_list: entry in devices list 310 322 * @refcount: Reference counter, dominated by open iser connections 311 - * @comps_used: Number of completion contexts used, Min between online 312 - * cpus and device max completion vectors 313 - * @comps: Dinamically allocated array of completion handlers 314 323 */ 315 324 struct iser_device { 316 325 struct ib_device *ib_device; ··· 315 330 struct ib_event_handler event_handler; 316 331 struct list_head ig_list; 317 332 int refcount; 318 - int comps_used; 319 - struct iser_comp *comps; 320 333 }; 321 334 322 335 /** ··· 336 353 * @list: entry in connection fastreg pool 337 354 * @rsc: data buffer registration resources 338 355 * @sig_protected: is region protected indicator 356 + * @all_list: first and last list members 339 357 */ 340 358 struct iser_fr_desc { 341 359 struct list_head list; ··· 351 367 * @list: list of fastreg descriptors 352 368 * @lock: protects fastreg pool 353 369 * @size: size of the pool 370 + * @all_list: first and last list members 354 371 */ 355 372 struct iser_fr_pool { 356 373 struct list_head list; ··· 365 380 * 366 381 * @cma_id: rdma_cm connection maneger handle 367 382 * @qp: Connection Queue-pair 383 + * @cq: Connection completion queue 384 + * @cq_size: The number of max outstanding completions 368 385 * @post_recv_buf_count: post receive counter 369 386 * @sig_count: send work request signal count 370 387 * @rx_wr: receive work request for batch posts 371 388 * @device: reference to iser device 372 - * @comp: iser completion context 373 389 * @fr_pool: connection fast registration poool 374 390 * @pi_support: Indicate device T10-PI support 375 391 * @reg_cqe: completion handler ··· 378 392 struct ib_conn { 379 393 struct rdma_cm_id *cma_id; 380 394 struct ib_qp *qp; 395 + struct ib_cq *cq; 396 + u32 cq_size; 381 397 int post_recv_buf_count; 382 398 u8 sig_count; 383 399 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; 384 400 struct iser_device *device; 385 - struct iser_comp *comp; 386 401 struct iser_fr_pool fr_pool; 387 402 bool pi_support; 388 403 struct ib_cqe reg_cqe;
+25 -87
drivers/infiniband/ulp/iser/iser_verbs.c
··· 68 68 static int iser_create_device_ib_res(struct iser_device *device) 69 69 { 70 70 struct ib_device *ib_dev = device->ib_device; 71 - int i, max_cqe; 72 71 73 72 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 74 73 iser_err("IB device does not support memory registrations\n"); 75 74 return -1; 76 75 } 77 76 78 - device->comps_used = min_t(int, num_online_cpus(), 79 - ib_dev->num_comp_vectors); 80 - 81 - device->comps = kcalloc(device->comps_used, sizeof(*device->comps), 82 - GFP_KERNEL); 83 - if (!device->comps) 84 - goto comps_err; 85 - 86 - max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 87 - 88 - iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", 89 - device->comps_used, dev_name(&ib_dev->dev), 90 - ib_dev->num_comp_vectors, max_cqe); 91 - 92 77 device->pd = ib_alloc_pd(ib_dev, 93 78 iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); 94 79 if (IS_ERR(device->pd)) 95 80 goto pd_err; 96 - 97 - for (i = 0; i < device->comps_used; i++) { 98 - struct iser_comp *comp = &device->comps[i]; 99 - 100 - comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i, 101 - IB_POLL_SOFTIRQ); 102 - if (IS_ERR(comp->cq)) { 103 - comp->cq = NULL; 104 - goto cq_err; 105 - } 106 - } 107 81 108 82 INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, 109 83 iser_event_handler); 110 84 ib_register_event_handler(&device->event_handler); 111 85 return 0; 112 86 113 - cq_err: 114 - for (i = 0; i < device->comps_used; i++) { 115 - struct iser_comp *comp = &device->comps[i]; 116 - 117 - if (comp->cq) 118 - ib_free_cq(comp->cq); 119 - } 120 - ib_dealloc_pd(device->pd); 121 87 pd_err: 122 - kfree(device->comps); 123 - comps_err: 124 88 iser_err("failed to allocate an IB resource\n"); 125 89 return -1; 126 90 } ··· 95 131 */ 96 132 static void iser_free_device_ib_res(struct iser_device *device) 97 133 { 98 - int i; 99 - 100 - for (i = 0; i < device->comps_used; i++) { 101 - struct iser_comp *comp = &device->comps[i]; 102 - 103 - ib_free_cq(comp->cq); 104 - comp->cq = NULL; 105 - } 106 - 107 134 ib_unregister_event_handler(&device->event_handler); 108 135 ib_dealloc_pd(device->pd); 109 136 110 - kfree(device->comps); 111 - device->comps = NULL; 112 137 device->pd = NULL; 113 138 } 114 139 ··· 240 287 struct ib_device *ib_dev; 241 288 struct ib_qp_init_attr init_attr; 242 289 int ret = -ENOMEM; 243 - int index, min_index = 0; 290 + unsigned int max_send_wr, cq_size; 244 291 245 292 BUG_ON(ib_conn->device == NULL); 246 293 247 294 device = ib_conn->device; 248 295 ib_dev = device->ib_device; 249 296 250 - memset(&init_attr, 0, sizeof init_attr); 297 + if (ib_conn->pi_support) 298 + max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; 299 + else 300 + max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; 301 + max_send_wr = min_t(unsigned int, max_send_wr, 302 + (unsigned int)ib_dev->attrs.max_qp_wr); 251 303 252 - mutex_lock(&ig.connlist_mutex); 253 - /* select the CQ with the minimal number of usages */ 254 - for (index = 0; index < device->comps_used; index++) { 255 - if (device->comps[index].active_qps < 256 - device->comps[min_index].active_qps) 257 - min_index = index; 304 + cq_size = max_send_wr + ISER_QP_MAX_RECV_DTOS; 305 + ib_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_SOFTIRQ); 306 + if (IS_ERR(ib_conn->cq)) { 307 + ret = PTR_ERR(ib_conn->cq); 308 + goto cq_err; 258 309 } 259 - ib_conn->comp = &device->comps[min_index]; 260 - ib_conn->comp->active_qps++; 261 - mutex_unlock(&ig.connlist_mutex); 262 - iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 310 + ib_conn->cq_size = cq_size; 311 + 312 + memset(&init_attr, 0, sizeof(init_attr)); 263 313 264 314 init_attr.event_handler = iser_qp_event_callback; 265 315 init_attr.qp_context = (void *)ib_conn; 266 - init_attr.send_cq = ib_conn->comp->cq; 267 - init_attr.recv_cq = ib_conn->comp->cq; 316 + init_attr.send_cq = ib_conn->cq; 317 + init_attr.recv_cq = ib_conn->cq; 268 318 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 269 319 init_attr.cap.max_send_sge = 2; 270 320 init_attr.cap.max_recv_sge = 1; 271 321 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 272 322 init_attr.qp_type = IB_QPT_RC; 273 - if (ib_conn->pi_support) { 274 - init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; 323 + init_attr.cap.max_send_wr = max_send_wr; 324 + if (ib_conn->pi_support) 275 325 init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 276 - iser_conn->max_cmds = 277 - ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); 278 - } else { 279 - if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 280 - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; 281 - iser_conn->max_cmds = 282 - ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 283 - } else { 284 - init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr; 285 - iser_conn->max_cmds = 286 - ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); 287 - iser_dbg("device %s supports max_send_wr %d\n", 288 - dev_name(&device->ib_device->dev), 289 - ib_dev->attrs.max_qp_wr); 290 - } 291 - } 326 + iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(max_send_wr - 1); 292 327 293 328 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 294 329 if (ret) 295 330 goto out_err; 296 331 297 332 ib_conn->qp = ib_conn->cma_id->qp; 298 - iser_info("setting conn %p cma_id %p qp %p\n", 333 + iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", 299 334 ib_conn, ib_conn->cma_id, 300 - ib_conn->cma_id->qp); 335 + ib_conn->cma_id->qp, max_send_wr); 301 336 return ret; 302 337 303 338 out_err: 304 - mutex_lock(&ig.connlist_mutex); 305 - ib_conn->comp->active_qps--; 306 - mutex_unlock(&ig.connlist_mutex); 339 + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); 340 + cq_err: 307 341 iser_err("unable to alloc mem or create resource, err %d\n", ret); 308 342 309 343 return ret; ··· 402 462 iser_conn, ib_conn->cma_id, ib_conn->qp); 403 463 404 464 if (ib_conn->qp != NULL) { 405 - mutex_lock(&ig.connlist_mutex); 406 - ib_conn->comp->active_qps--; 407 - mutex_unlock(&ig.connlist_mutex); 408 465 rdma_destroy_qp(ib_conn->cma_id); 466 + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); 409 467 ib_conn->qp = NULL; 410 468 } 411 469
+36 -139
drivers/infiniband/ulp/isert/ib_isert.c
··· 24 24 25 25 #include "ib_isert.h" 26 26 27 - #define ISERT_MAX_CONN 8 28 - #define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) 29 - #define ISER_MAX_TX_CQ_LEN \ 30 - ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN) 31 - #define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ 32 - ISERT_MAX_CONN) 33 - 34 27 static int isert_debug_level; 35 28 module_param_named(debug_level, isert_debug_level, int, 0644); 36 29 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); ··· 75 82 } 76 83 } 77 84 78 - static struct isert_comp * 79 - isert_comp_get(struct isert_conn *isert_conn) 80 - { 81 - struct isert_device *device = isert_conn->device; 82 - struct isert_comp *comp; 83 - int i, min = 0; 84 - 85 - mutex_lock(&device_list_mutex); 86 - for (i = 0; i < device->comps_used; i++) 87 - if (device->comps[i].active_qps < 88 - device->comps[min].active_qps) 89 - min = i; 90 - comp = &device->comps[min]; 91 - comp->active_qps++; 92 - mutex_unlock(&device_list_mutex); 93 - 94 - isert_info("conn %p, using comp %p min_index: %d\n", 95 - isert_conn, comp, min); 96 - 97 - return comp; 98 - } 99 - 100 - static void 101 - isert_comp_put(struct isert_comp *comp) 102 - { 103 - mutex_lock(&device_list_mutex); 104 - comp->active_qps--; 105 - mutex_unlock(&device_list_mutex); 106 - } 107 - 108 85 static struct ib_qp * 109 86 isert_create_qp(struct isert_conn *isert_conn, 110 - struct isert_comp *comp, 111 87 struct rdma_cm_id *cma_id) 112 88 { 89 + u32 cq_size = ISERT_QP_MAX_REQ_DTOS + ISERT_QP_MAX_RECV_DTOS + 2; 113 90 struct isert_device *device = isert_conn->device; 91 + struct ib_device *ib_dev = device->ib_device; 114 92 struct ib_qp_init_attr attr; 115 - int ret; 93 + int ret, factor; 94 + 95 + isert_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_WORKQUEUE); 96 + if (IS_ERR(isert_conn->cq)) { 97 + isert_err("Unable to allocate cq\n"); 98 + ret = PTR_ERR(isert_conn->cq); 99 + return ERR_PTR(ret); 100 + } 101 + isert_conn->cq_size = cq_size; 116 102 117 103 memset(&attr, 0, sizeof(struct ib_qp_init_attr)); 118 104 attr.event_handler = isert_qp_event_callback; 119 105 attr.qp_context = isert_conn; 120 - attr.send_cq = comp->cq; 121 - attr.recv_cq = comp->cq; 106 + attr.send_cq = isert_conn->cq; 107 + attr.recv_cq = isert_conn->cq; 122 108 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; 123 109 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 124 - attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; 110 + factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, 111 + ISCSI_ISER_MAX_SG_TABLESIZE); 112 + attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; 125 113 attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; 126 114 attr.cap.max_recv_sge = 1; 127 115 attr.sq_sig_type = IB_SIGNAL_REQ_WR; ··· 113 139 ret = rdma_create_qp(cma_id, device->pd, &attr); 114 140 if (ret) { 115 141 isert_err("rdma_create_qp failed for cma_id %d\n", ret); 142 + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); 143 + 116 144 return ERR_PTR(ret); 117 145 } 118 146 119 147 return cma_id->qp; 120 - } 121 - 122 - static int 123 - isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) 124 - { 125 - struct isert_comp *comp; 126 - int ret; 127 - 128 - comp = isert_comp_get(isert_conn); 129 - isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id); 130 - if (IS_ERR(isert_conn->qp)) { 131 - ret = PTR_ERR(isert_conn->qp); 132 - goto err; 133 - } 134 - 135 - return 0; 136 - err: 137 - isert_comp_put(comp); 138 - return ret; 139 148 } 140 149 141 150 static int ··· 188 231 isert_conn->rx_descs = NULL; 189 232 } 190 233 191 - static void 192 - isert_free_comps(struct isert_device *device) 193 - { 194 - int i; 195 - 196 - for (i = 0; i < device->comps_used; i++) { 197 - struct isert_comp *comp = &device->comps[i]; 198 - 199 - if (comp->cq) 200 - ib_free_cq(comp->cq); 201 - } 202 - kfree(device->comps); 203 - } 204 - 205 - static int 206 - isert_alloc_comps(struct isert_device *device) 207 - { 208 - int i, max_cqe, ret = 0; 209 - 210 - device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(), 211 - device->ib_device->num_comp_vectors)); 212 - 213 - isert_info("Using %d CQs, %s supports %d vectors support " 214 - "pi_capable %d\n", 215 - device->comps_used, dev_name(&device->ib_device->dev), 216 - device->ib_device->num_comp_vectors, 217 - device->pi_capable); 218 - 219 - device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), 220 - GFP_KERNEL); 221 - if (!device->comps) 222 - return -ENOMEM; 223 - 224 - max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe); 225 - 226 - for (i = 0; i < device->comps_used; i++) { 227 - struct isert_comp *comp = &device->comps[i]; 228 - 229 - comp->device = device; 230 - comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i, 231 - IB_POLL_WORKQUEUE); 232 - if (IS_ERR(comp->cq)) { 233 - isert_err("Unable to allocate cq\n"); 234 - ret = PTR_ERR(comp->cq); 235 - comp->cq = NULL; 236 - goto out_cq; 237 - } 238 - } 239 - 240 - return 0; 241 - out_cq: 242 - isert_free_comps(device); 243 - return ret; 244 - } 245 - 246 234 static int 247 235 isert_create_device_ib_res(struct isert_device *device) 248 236 { ··· 198 296 ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); 199 297 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); 200 298 201 - ret = isert_alloc_comps(device); 202 - if (ret) 203 - goto out; 204 - 205 299 device->pd = ib_alloc_pd(ib_dev, 0); 206 300 if (IS_ERR(device->pd)) { 207 301 ret = PTR_ERR(device->pd); 208 302 isert_err("failed to allocate pd, device %p, ret=%d\n", 209 303 device, ret); 210 - goto out_cq; 304 + return ret; 211 305 } 212 306 213 307 /* Check signature cap */ ··· 211 313 IB_DEVICE_INTEGRITY_HANDOVER ? true : false; 212 314 213 315 return 0; 214 - 215 - out_cq: 216 - isert_free_comps(device); 217 - out: 218 - if (ret > 0) 219 - ret = -EINVAL; 220 - return ret; 221 316 } 222 317 223 318 static void ··· 219 328 isert_info("device %p\n", device); 220 329 221 330 ib_dealloc_pd(device->pd); 222 - isert_free_comps(device); 223 331 } 224 332 225 333 static void ··· 380 490 } 381 491 } 382 492 493 + static void 494 + isert_destroy_qp(struct isert_conn *isert_conn) 495 + { 496 + ib_destroy_qp(isert_conn->qp); 497 + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); 498 + } 499 + 383 500 static int 384 501 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 385 502 { ··· 427 530 428 531 isert_set_nego_params(isert_conn, &event->param.conn); 429 532 430 - ret = isert_conn_setup_qp(isert_conn, cma_id); 431 - if (ret) 533 + isert_conn->qp = isert_create_qp(isert_conn, cma_id); 534 + if (IS_ERR(isert_conn->qp)) { 535 + ret = PTR_ERR(isert_conn->qp); 432 536 goto out_conn_dev; 537 + } 433 538 434 539 ret = isert_login_post_recv(isert_conn); 435 540 if (ret) 436 - goto out_conn_dev; 541 + goto out_destroy_qp; 437 542 438 543 ret = isert_rdma_accept(isert_conn); 439 544 if (ret) 440 - goto out_conn_dev; 545 + goto out_destroy_qp; 441 546 442 547 mutex_lock(&isert_np->mutex); 443 548 list_add_tail(&isert_conn->node, &isert_np->accepted); ··· 447 548 448 549 return 0; 449 550 551 + out_destroy_qp: 552 + isert_destroy_qp(isert_conn); 450 553 out_conn_dev: 451 554 isert_device_put(device); 452 555 out_rsp_dma_map: ··· 473 572 !isert_conn->dev_removed) 474 573 rdma_destroy_id(isert_conn->cm_id); 475 574 476 - if (isert_conn->qp) { 477 - struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context; 478 - 479 - isert_comp_put(comp); 480 - ib_destroy_qp(isert_conn->qp); 481 - } 575 + if (isert_conn->qp) 576 + isert_destroy_qp(isert_conn); 482 577 483 578 if (isert_conn->login_req_buf) 484 579 isert_free_login_buf(isert_conn);
+4 -17
drivers/infiniband/ulp/isert/ib_isert.h
··· 63 63 (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ 64 64 sizeof(struct ib_cqe) + sizeof(bool))) 65 65 66 - #define ISCSI_ISER_SG_TABLESIZE 256 66 + /* Maximum support is 16MB I/O size */ 67 + #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 67 68 68 69 enum isert_desc_type { 69 70 ISCSI_TX_CONTROL, ··· 156 155 struct iser_tx_desc login_tx_desc; 157 156 struct rdma_cm_id *cm_id; 158 157 struct ib_qp *qp; 158 + struct ib_cq *cq; 159 + u32 cq_size; 159 160 struct isert_device *device; 160 161 struct mutex mutex; 161 162 struct kref kref; ··· 166 163 bool snd_w_inv; 167 164 wait_queue_head_t rem_wait; 168 165 bool dev_removed; 169 - }; 170 - 171 - #define ISERT_MAX_CQ 64 172 - 173 - /** 174 - * struct isert_comp - iSER completion context 175 - * 176 - * @device: pointer to device handle 177 - * @cq: completion queue 178 - * @active_qps: Number of active QPs attached 179 - * to completion context 180 - */ 181 - struct isert_comp { 182 - struct isert_device *device; 183 - struct ib_cq *cq; 184 - int active_qps; 185 166 }; 186 167 187 168 struct isert_device {
+23
drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
··· 118 118 * struct opa_vesw_info - OPA vnic switch information 119 119 * @fabric_id: 10-bit fabric id 120 120 * @vesw_id: 12-bit virtual ethernet switch id 121 + * @rsvd0: reserved bytes 121 122 * @def_port_mask: bitmask of default ports 123 + * @rsvd1: reserved bytes 122 124 * @pkey: partition key 125 + * @rsvd2: reserved bytes 123 126 * @u_mcast_dlid: unknown multicast dlid 124 127 * @u_ucast_dlid: array of unknown unicast dlids 128 + * @rsvd3: reserved bytes 125 129 * @rc: routing control 126 130 * @eth_mtu: Ethernet MTU 131 + * @rsvd4: reserved bytes 127 132 */ 128 133 struct opa_vesw_info { 129 134 __be16 fabric_id; ··· 155 150 * struct opa_per_veswport_info - OPA vnic per port information 156 151 * @port_num: port number 157 152 * @eth_link_status: current ethernet link state 153 + * @rsvd0: reserved bytes 158 154 * @base_mac_addr: base mac address 159 155 * @config_state: configured port state 160 156 * @oper_state: operational port state 161 157 * @max_mac_tbl_ent: max number of mac table entries 162 158 * @max_smac_ent: max smac entries in mac table 163 159 * @mac_tbl_digest: mac table digest 160 + * @rsvd1: reserved bytes 164 161 * @encap_slid: base slid for the port 165 162 * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets 166 163 * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets ··· 172 165 * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets 173 166 * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets 174 167 * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets 168 + * @rsvd2: reserved bytes 175 169 * @uc_macs_gen_count: generation count for unicast macs list 176 170 * @mc_macs_gen_count: generation count for multicast macs list 171 + * @rsvd3: reserved bytes 177 172 */ 178 173 struct opa_per_veswport_info { 179 174 __be32 port_num; ··· 303 294 * @rx_512_1023: received packet length is >=512 and < 1023 bytes 304 295 * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes 305 296 * @rx_1519_max: received packet length >= 1519 bytes 297 + * @reserved: reserved bytes 306 298 * 307 299 * All the above are counters of corresponding conditions. 308 300 */ ··· 357 347 * @veswport_num: virtual ethernet switch port number 358 348 * @tx_errors: transmit errors 359 349 * @rx_errors: receive errors 350 + * @rsvd0: reserved bytes 360 351 * @tx_smac_filt: smac filter errors 352 + * @rsvd1: reserved bytes 353 + * @rsvd2: reserved bytes 354 + * @rsvd3: reserved bytes 361 355 * @tx_dlid_zero: transmit packets with invalid dlid 356 + * @rsvd4: reserved bytes 362 357 * @tx_logic: other transmit errors 358 + * @rsvd5: reserved bytes 363 359 * @tx_drop_state: packet tansmission in non-forward port state 364 360 * @rx_bad_veswid: received packet with invalid vesw id 361 + * @rsvd6: reserved bytes 365 362 * @rx_runt: received ethernet packet with length < 64 bytes 366 363 * @rx_oversize: received ethernet packet with length > MTU size 364 + * @rsvd7: reserved bytes 367 365 * @rx_eth_down: received packets when interface is down 368 366 * @rx_drop_state: received packets in non-forwarding port state 369 367 * @rx_logic: other receive errors 368 + * @rsvd8: reserved bytes 369 + * @rsvd9: reserved bytes 370 370 * 371 371 * All the above are counters of corresponding error conditions. 372 372 */ ··· 467 447 * struct opa_vnic_vema_mad - Generic VEMA MAD 468 448 * @mad_hdr: Generic MAD header 469 449 * @rmpp_hdr: RMPP header for vendor specific MADs 450 + * @reserved: reserved bytes 470 451 * @oui: Unique org identifier 471 452 * @data: MAD data 472 453 */ ··· 488 467 * @trap_num: Trap number 489 468 * @toggle_count: Notice toggle bit and count value 490 469 * @issuer_lid: Trap issuer's lid 470 + * @reserved: reserved bytes 491 471 * @issuer_gid: Issuer GID (only if Report method) 492 472 * @raw_data: Trap message body 493 473 */ ··· 509 487 * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap 510 488 * @mad_hdr: Generic MAD header 511 489 * @rmpp_hdr: RMPP header for vendor specific MADs 490 + * @reserved: reserved bytes 512 491 * @oui: Unique org identifier 513 492 * @notice: Notice structure 514 493 */
+13 -3
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 12 12 13 13 #include <linux/module.h> 14 14 #include <linux/rculist.h> 15 + #include <linux/random.h> 15 16 16 17 #include "rtrs-clt.h" 17 18 #include "rtrs-log.h" ··· 24 23 * leads to "false positives" failed reconnect attempts 25 24 */ 26 25 #define RTRS_RECONNECT_BACKOFF 1000 26 + /* 27 + * Wait for additional random time between 0 and 8 seconds 28 + * before starting to reconnect to avoid clients reconnecting 29 + * all at once in case of a major network outage 30 + */ 31 + #define RTRS_RECONNECT_SEED 8 27 32 28 33 MODULE_DESCRIPTION("RDMA Transport Client"); 29 34 MODULE_LICENSE("GPL"); ··· 313 306 */ 314 307 delay_ms = clt->reconnect_delay_sec * 1000; 315 308 queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 316 - msecs_to_jiffies(delay_ms)); 309 + msecs_to_jiffies(delay_ms + 310 + prandom_u32() % RTRS_RECONNECT_SEED)); 317 311 } else { 318 312 /* 319 313 * Error can happen just on establishing new connection, ··· 2511 2503 sess->stats->reconnects.fail_cnt++; 2512 2504 delay_ms = clt->reconnect_delay_sec * 1000; 2513 2505 queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 2514 - msecs_to_jiffies(delay_ms)); 2506 + msecs_to_jiffies(delay_ms + 2507 + prandom_u32() % 2508 + RTRS_RECONNECT_SEED)); 2515 2509 } 2516 2510 } 2517 2511 ··· 2982 2972 pr_err("Failed to create rtrs-client dev class\n"); 2983 2973 return PTR_ERR(rtrs_clt_dev_class); 2984 2974 } 2985 - rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0); 2975 + rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); 2986 2976 if (!rtrs_wq) { 2987 2977 class_destroy(rtrs_clt_dev_class); 2988 2978 return -ENOMEM;
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-srv.c
··· 2150 2150 err = PTR_ERR(rtrs_dev_class); 2151 2151 goto out_chunk_pool; 2152 2152 } 2153 - rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); 2153 + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); 2154 2154 if (!rtrs_wq) { 2155 2155 err = -ENOMEM; 2156 2156 goto out_dev_class;
+9 -11
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 869 869 870 870 static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) 871 871 { 872 - struct srpt_rdma_ch *ch = cq->cq_context; 872 + struct srpt_rdma_ch *ch = wc->qp->qp_context; 873 873 874 874 pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num, 875 875 wc->status); ··· 1322 1322 */ 1323 1323 static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) 1324 1324 { 1325 - struct srpt_rdma_ch *ch = cq->cq_context; 1325 + struct srpt_rdma_ch *ch = wc->qp->qp_context; 1326 1326 struct srpt_send_ioctx *ioctx = 1327 1327 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); 1328 1328 ··· 1683 1683 1684 1684 static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) 1685 1685 { 1686 - struct srpt_rdma_ch *ch = cq->cq_context; 1686 + struct srpt_rdma_ch *ch = wc->qp->qp_context; 1687 1687 struct srpt_recv_ioctx *ioctx = 1688 1688 container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); 1689 1689 ··· 1744 1744 */ 1745 1745 static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) 1746 1746 { 1747 - struct srpt_rdma_ch *ch = cq->cq_context; 1747 + struct srpt_rdma_ch *ch = wc->qp->qp_context; 1748 1748 struct srpt_send_ioctx *ioctx = 1749 1749 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); 1750 1750 enum srpt_command_state state; ··· 1791 1791 goto out; 1792 1792 1793 1793 retry: 1794 - ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, 1794 + ch->cq = ib_cq_pool_get(sdev->device, ch->rq_size + sq_size, -1, 1795 1795 IB_POLL_WORKQUEUE); 1796 1796 if (IS_ERR(ch->cq)) { 1797 1797 ret = PTR_ERR(ch->cq); ··· 1799 1799 ch->rq_size + sq_size, ret); 1800 1800 goto out; 1801 1801 } 1802 + ch->cq_size = ch->rq_size + sq_size; 1802 1803 1803 1804 qp_init->qp_context = (void *)ch; 1804 1805 qp_init->event_handler ··· 1844 1843 if (retry) { 1845 1844 pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n", 1846 1845 sq_size, ret); 1847 - ib_free_cq(ch->cq); 1846 + ib_cq_pool_put(ch->cq, ch->cq_size); 1848 1847 sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); 1849 1848 goto retry; 1850 1849 } else { ··· 1870 1869 1871 1870 err_destroy_cq: 1872 1871 ch->qp = NULL; 1873 - ib_free_cq(ch->cq); 1872 + ib_cq_pool_put(ch->cq, ch->cq_size); 1874 1873 goto out; 1875 1874 } 1876 1875 1877 1876 static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) 1878 1877 { 1879 1878 ib_destroy_qp(ch->qp); 1880 - ib_free_cq(ch->cq); 1879 + ib_cq_pool_put(ch->cq, ch->cq_size); 1881 1880 } 1882 1881 1883 1882 /** ··· 2156 2155 struct srpt_tpg *stpg; 2157 2156 2158 2157 WARN_ON_ONCE(irqs_disabled()); 2159 - 2160 - if (WARN_ON(!sdev || !req)) 2161 - return -EINVAL; 2162 2158 2163 2159 it_iu_len = be32_to_cpu(req->req_it_iu_len); 2164 2160
+1
drivers/infiniband/ulp/srpt/ib_srpt.h
··· 300 300 } rdma_cm; 301 301 }; 302 302 struct ib_cq *cq; 303 + u32 cq_size; 303 304 struct ib_cqe zw_cqe; 304 305 struct rcu_head rcu; 305 306 struct kref kref;
+9 -2
drivers/net/ethernet/mellanox/mlx5/core/alloc.c
··· 299 299 } 300 300 EXPORT_SYMBOL_GPL(mlx5_fill_page_array); 301 301 302 - void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) 302 + void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) 303 303 { 304 304 int i; 305 305 306 + WARN_ON(perm & 0xfc); 306 307 for (i = 0; i < buf->npages; i++) 307 - pas[i] = cpu_to_be64(buf->frags[i].map); 308 + pas[i] = cpu_to_be64(buf->frags[i].map | perm); 309 + } 310 + EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm); 311 + 312 + void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) 313 + { 314 + mlx5_fill_page_frag_array_perm(buf, pas, 0); 308 315 } 309 316 EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
+1
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
··· 1577 1577 static bool counter_is_valid(u32 action) 1578 1578 { 1579 1579 return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | 1580 + MLX5_FLOW_CONTEXT_ACTION_ALLOW | 1580 1581 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); 1581 1582 } 1582 1583
+7
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
··· 226 226 227 227 int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) 228 228 { 229 + unsigned char *dev_addr = priv->netdev->dev_addr; 229 230 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 230 231 u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; 231 232 struct mlx5i_priv *ipriv = priv->ppriv; 232 233 void *addr_path; 234 + int qpn = 0; 233 235 int ret = 0; 234 236 void *qpc; 237 + 238 + if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) { 239 + qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3]; 240 + MLX5_SET(create_qp_in, in, input_qpn, qpn); 241 + } 235 242 236 243 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 237 244 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
+3
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 557 557 if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) 558 558 MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); 559 559 560 + if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) 561 + MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); 562 + 560 563 return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); 561 564 } 562 565
+5 -4
include/linux/mlx5/device.h
··· 276 276 MLX5_MKEY_MASK_RW = 1ull << 20, 277 277 MLX5_MKEY_MASK_A = 1ull << 21, 278 278 MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, 279 - MLX5_MKEY_MASK_FREE = 1ull << 29, 279 + MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE = 1ull << 25, 280 + MLX5_MKEY_MASK_FREE = 1ull << 29, 281 + MLX5_MKEY_MASK_RELAXED_ORDERING_READ = 1ull << 47, 280 282 }; 281 283 282 284 enum { ··· 1009 1007 MLX5_MKEY_REMOTE_INVAL = 1 << 24, 1010 1008 MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, 1011 1009 MLX5_MKEY_BSF_EN = 1 << 30, 1012 - MLX5_MKEY_LEN64 = 1 << 31, 1013 1010 }; 1014 1011 1015 1012 struct mlx5_mkey_seg { ··· 1362 1361 MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap) 1363 1362 1364 1363 #define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\ 1365 - MLX5_GET(device_virtio_emulation_cap, \ 1364 + MLX5_GET(virtio_emulation_cap, \ 1366 1365 (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) 1367 1366 1368 1367 #define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\ 1369 - MLX5_GET64(device_virtio_emulation_cap, \ 1368 + MLX5_GET64(virtio_emulation_cap, \ 1370 1369 (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) 1371 1370 1372 1371 #define MLX5_CAP_IPSEC(mdev, cap)\
+2
include/linux/mlx5/driver.h
··· 975 975 void mlx5_unregister_debugfs(void); 976 976 977 977 void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); 978 + void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); 978 979 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); 979 980 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 980 981 unsigned int *irqn); ··· 1058 1057 enum { 1059 1058 MLX5_INTERFACE_PROTOCOL_IB = 0, 1060 1059 MLX5_INTERFACE_PROTOCOL_ETH = 1, 1060 + MLX5_INTERFACE_PROTOCOL_VDPA = 2, 1061 1061 }; 1062 1062 1063 1063 struct mlx5_interface {
+113 -16
include/linux/mlx5/mlx5_ifc.h
··· 93 93 94 94 enum { 95 95 MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, 96 + MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, 96 97 MLX5_OBJ_TYPE_MKEY = 0xff01, 97 98 MLX5_OBJ_TYPE_QP = 0xff02, 98 99 MLX5_OBJ_TYPE_PSV = 0xff03, ··· 986 985 u8 user_unaffiliated_events[4][0x40]; 987 986 }; 988 987 989 - struct mlx5_ifc_device_virtio_emulation_cap_bits { 990 - u8 reserved_at_0[0x20]; 988 + struct mlx5_ifc_virtio_emulation_cap_bits { 989 + u8 desc_tunnel_offload_type[0x1]; 990 + u8 eth_frame_offload_type[0x1]; 991 + u8 virtio_version_1_0[0x1]; 992 + u8 device_features_bits_mask[0xd]; 993 + u8 event_mode[0x8]; 994 + u8 virtio_queue_type[0x8]; 991 995 992 - u8 reserved_at_20[0x13]; 996 + u8 max_tunnel_desc[0x10]; 997 + u8 reserved_at_30[0x3]; 993 998 u8 log_doorbell_stride[0x5]; 994 999 u8 reserved_at_38[0x3]; 995 1000 u8 log_doorbell_bar_size[0x5]; 996 1001 997 1002 u8 doorbell_bar_offset[0x40]; 998 1003 999 - u8 reserved_at_80[0x780]; 1004 + u8 max_emulated_devices[0x8]; 1005 + u8 max_num_virtio_queues[0x18]; 1006 + 1007 + u8 reserved_at_a0[0x60]; 1008 + 1009 + u8 umem_1_buffer_param_a[0x20]; 1010 + 1011 + u8 umem_1_buffer_param_b[0x20]; 1012 + 1013 + u8 umem_2_buffer_param_a[0x20]; 1014 + 1015 + u8 umem_2_buffer_param_b[0x20]; 1016 + 1017 + u8 umem_3_buffer_param_a[0x20]; 1018 + 1019 + u8 umem_3_buffer_param_b[0x20]; 1020 + 1021 + u8 reserved_at_1c0[0x640]; 1000 1022 }; 1001 1023 1002 1024 enum { ··· 1244 1220 1245 1221 u8 max_sgl_for_optimized_performance[0x8]; 1246 1222 u8 log_max_cq_sz[0x8]; 1247 - u8 reserved_at_d0[0xb]; 1223 + u8 relaxed_ordering_write_umr[0x1]; 1224 + u8 relaxed_ordering_read_umr[0x1]; 1225 + u8 reserved_at_d2[0x7]; 1226 + u8 virtio_net_device_emualtion_manager[0x1]; 1227 + u8 virtio_blk_device_emualtion_manager[0x1]; 1248 1228 u8 log_max_cq[0x5]; 1249 1229 1250 1230 u8 log_max_eq_sz[0x8]; ··· 1424 1396 u8 bf[0x1]; 1425 1397 u8 driver_version[0x1]; 1426 1398 u8 pad_tx_eth_packet[0x1]; 1427 - u8 reserved_at_263[0x8]; 1399 + u8 reserved_at_263[0x3]; 1400 + u8 mkey_by_name[0x1]; 1401 + u8 reserved_at_267[0x4]; 1402 + 1428 1403 u8 log_bf_reg_size[0x5]; 1429 1404 1430 1405 u8 reserved_at_270[0x8]; ··· 2984 2953 struct mlx5_ifc_fpga_cap_bits fpga_cap; 2985 2954 struct mlx5_ifc_tls_cap_bits tls_cap; 2986 2955 struct mlx5_ifc_device_mem_cap_bits device_mem_cap; 2987 - struct mlx5_ifc_device_virtio_emulation_cap_bits virtio_emulation_cap; 2956 + struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; 2988 2957 u8 reserved_at_0[0x8000]; 2989 2958 }; 2990 2959 ··· 3333 3302 }; 3334 3303 3335 3304 struct mlx5_ifc_rqtc_bits { 3336 - u8 reserved_at_0[0xa0]; 3305 + u8 reserved_at_0[0xa0]; 3337 3306 3338 - u8 reserved_at_a0[0x10]; 3339 - u8 rqt_max_size[0x10]; 3307 + u8 reserved_at_a0[0x5]; 3308 + u8 list_q_type[0x3]; 3309 + u8 reserved_at_a8[0x8]; 3310 + u8 rqt_max_size[0x10]; 3340 3311 3341 - u8 reserved_at_c0[0x10]; 3342 - u8 rqt_actual_size[0x10]; 3312 + u8 rq_vhca_id_format[0x1]; 3313 + u8 reserved_at_c1[0xf]; 3314 + u8 rqt_actual_size[0x10]; 3343 3315 3344 - u8 reserved_at_e0[0x6a0]; 3316 + u8 reserved_at_e0[0x6a0]; 3345 3317 3346 3318 struct mlx5_ifc_rq_num_bits rq_num[]; 3347 3319 }; ··· 7126 7092 7127 7093 struct mlx5_ifc_destroy_mkey_in_bits { 7128 7094 u8 opcode[0x10]; 7129 - u8 reserved_at_10[0x10]; 7095 + u8 uid[0x10]; 7130 7096 7131 7097 u8 reserved_at_20[0x10]; 7132 7098 u8 op_mod[0x10]; ··· 7757 7723 u8 reserved_at_20[0x10]; 7758 7724 u8 op_mod[0x10]; 7759 7725 7760 - u8 reserved_at_40[0x40]; 7726 + u8 reserved_at_40[0x8]; 7727 + u8 input_qpn[0x18]; 7761 7728 7729 + u8 reserved_at_60[0x20]; 7762 7730 u8 opt_param_mask[0x20]; 7763 7731 7764 7732 u8 ece[0x20]; ··· 7824 7788 7825 7789 struct mlx5_ifc_create_mkey_in_bits { 7826 7790 u8 opcode[0x10]; 7827 - u8 reserved_at_10[0x10]; 7791 + u8 uid[0x10]; 7828 7792 7829 7793 u8 reserved_at_20[0x10]; 7830 7794 u8 op_mod[0x10]; ··· 10382 10346 struct mlx5_ifc_umem_bits umem; 10383 10347 }; 10384 10348 10349 + struct mlx5_ifc_create_umem_out_bits { 10350 + u8 status[0x8]; 10351 + u8 reserved_at_8[0x18]; 10352 + 10353 + u8 syndrome[0x20]; 10354 + 10355 + u8 reserved_at_40[0x8]; 10356 + u8 umem_id[0x18]; 10357 + 10358 + u8 reserved_at_60[0x20]; 10359 + }; 10360 + 10361 + struct mlx5_ifc_destroy_umem_in_bits { 10362 + u8 opcode[0x10]; 10363 + u8 uid[0x10]; 10364 + 10365 + u8 reserved_at_20[0x10]; 10366 + u8 op_mod[0x10]; 10367 + 10368 + u8 reserved_at_40[0x8]; 10369 + u8 umem_id[0x18]; 10370 + 10371 + u8 reserved_at_60[0x20]; 10372 + }; 10373 + 10374 + struct mlx5_ifc_destroy_umem_out_bits { 10375 + u8 status[0x8]; 10376 + u8 reserved_at_8[0x18]; 10377 + 10378 + u8 syndrome[0x20]; 10379 + 10380 + u8 reserved_at_40[0x40]; 10381 + }; 10382 + 10385 10383 struct mlx5_ifc_create_uctx_in_bits { 10386 10384 u8 opcode[0x10]; 10387 10385 u8 reserved_at_10[0x10]; ··· 10426 10356 u8 reserved_at_40[0x40]; 10427 10357 10428 10358 struct mlx5_ifc_uctx_bits uctx; 10359 + }; 10360 + 10361 + struct mlx5_ifc_create_uctx_out_bits { 10362 + u8 status[0x8]; 10363 + u8 reserved_at_8[0x18]; 10364 + 10365 + u8 syndrome[0x20]; 10366 + 10367 + u8 reserved_at_40[0x10]; 10368 + u8 uid[0x10]; 10369 + 10370 + u8 reserved_at_60[0x20]; 10429 10371 }; 10430 10372 10431 10373 struct mlx5_ifc_destroy_uctx_in_bits { ··· 10451 10369 u8 uid[0x10]; 10452 10370 10453 10371 u8 reserved_at_60[0x20]; 10372 + }; 10373 + 10374 + struct mlx5_ifc_destroy_uctx_out_bits { 10375 + u8 status[0x8]; 10376 + u8 reserved_at_8[0x18]; 10377 + 10378 + u8 syndrome[0x20]; 10379 + 10380 + u8 reserved_at_40[0x40]; 10454 10381 }; 10455 10382 10456 10383 struct mlx5_ifc_create_sw_icm_in_bits { ··· 10776 10685 u8 auth_state[0x2]; 10777 10686 u8 reserved_at_44[0x4]; 10778 10687 u8 hw_offset_record_number[0x18]; 10688 + }; 10689 + 10690 + enum { 10691 + MLX5_MTT_PERM_READ = 1 << 0, 10692 + MLX5_MTT_PERM_WRITE = 1 << 1, 10693 + MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE, 10779 10694 }; 10780 10695 10781 10696 #endif /* MLX5_IFC_H */
+2 -29
include/rdma/ib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2010 Intel Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 - #if !defined(_RDMA_IB_H) 6 + #ifndef _RDMA_IB_H 34 7 #define _RDMA_IB_H 35 8 36 9 #include <linux/types.h>
+2 -29
include/rdma/ib_addr.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 4 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 - * 5 - * This software is available to you under a choice of one of two 6 - * licenses. You may choose to be licensed under the terms of the GNU 7 - * General Public License (GPL) Version 2, available from the file 8 - * COPYING in the main directory of this source tree, or the 9 - * OpenIB.org BSD license below: 10 - * 11 - * Redistribution and use in source and binary forms, with or 12 - * without modification, are permitted provided that the following 13 - * conditions are met: 14 - * 15 - * - Redistributions of source code must retain the above 16 - * copyright notice, this list of conditions and the following 17 - * disclaimer. 18 - * 19 - * - Redistributions in binary form must reproduce the above 20 - * copyright notice, this list of conditions and the following 21 - * disclaimer in the documentation and/or other materials 22 - * provided with the distribution. 23 - * 24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 - * SOFTWARE. 32 5 */ 33 6 34 - #if !defined(IB_ADDR_H) 7 + #ifndef IB_ADDR_H 35 8 #define IB_ADDR_H 36 9 37 10 #include <linux/in.h>
+1 -28
include/rdma/ib_cache.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 4 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 - * 6 - * This software is available to you under a choice of one of two 7 - * licenses. You may choose to be licensed under the terms of the GNU 8 - * General Public License (GPL) Version 2, available from the file 9 - * COPYING in the main directory of this source tree, or the 10 - * OpenIB.org BSD license below: 11 - * 12 - * Redistribution and use in source and binary forms, with or 13 - * without modification, are permitted provided that the following 14 - * conditions are met: 15 - * 16 - * - Redistributions of source code must retain the above 17 - * copyright notice, this list of conditions and the following 18 - * disclaimer. 19 - * 20 - * - Redistributions in binary form must reproduce the above 21 - * copyright notice, this list of conditions and the following 22 - * disclaimer in the documentation and/or other materials 23 - * provided with the distribution. 24 - * 25 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 - * SOFTWARE. 33 6 */ 34 7 35 8 #ifndef _IB_CACHE_H
+1
include/rdma/ib_cm.h
··· 6 6 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 7 * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. 8 8 */ 9 + 9 10 #ifndef IB_CM_H 10 11 #define IB_CM_H 11 12
+1 -43
include/rdma/ib_hdrs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright(c) 2016 - 2018 Intel Corporation. 3 - * 4 - * This file is provided under a dual BSD/GPLv2 license. When using or 5 - * redistributing this file, you may do so under either license. 6 - * 7 - * GPL LICENSE SUMMARY 8 - * 9 - * This program is free software; you can redistribute it and/or modify 10 - * it under the terms of version 2 of the GNU General Public License as 11 - * published by the Free Software Foundation. 12 - * 13 - * This program is distributed in the hope that it will be useful, but 14 - * WITHOUT ANY WARRANTY; without even the implied warranty of 15 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 - * General Public License for more details. 17 - * 18 - * BSD LICENSE 19 - * 20 - * Redistribution and use in source and binary forms, with or without 21 - * modification, are permitted provided that the following conditions 22 - * are met: 23 - * 24 - * - Redistributions of source code must retain the above copyright 25 - * notice, this list of conditions and the following disclaimer. 26 - * - Redistributions in binary form must reproduce the above copyright 27 - * notice, this list of conditions and the following disclaimer in 28 - * the documentation and/or other materials provided with the 29 - * distribution. 30 - * - Neither the name of Intel Corporation nor the names of its 31 - * contributors may be used to endorse or promote products derived 32 - * from this software without specific prior written permission. 33 - * 34 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 - * 46 4 */ 47 5 48 6 #ifndef IB_HDRS_H
+2 -29
include/rdma/ib_mad.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 4 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 4 5 * Copyright (c) 2004 Intel Corporation. All rights reserved. 5 6 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 6 7 * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved. 7 - * 8 - * This software is available to you under a choice of one of two 9 - * licenses. You may choose to be licensed under the terms of the GNU 10 - * General Public License (GPL) Version 2, available from the file 11 - * COPYING in the main directory of this source tree, or the 12 - * OpenIB.org BSD license below: 13 - * 14 - * Redistribution and use in source and binary forms, with or 15 - * without modification, are permitted provided that the following 16 - * conditions are met: 17 - * 18 - * - Redistributions of source code must retain the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer. 21 - * 22 - * - Redistributions in binary form must reproduce the above 23 - * copyright notice, this list of conditions and the following 24 - * disclaimer in the documentation and/or other materials 25 - * provided with the distribution. 26 - * 27 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 - * SOFTWARE. 35 8 */ 36 9 37 - #if !defined(IB_MAD_H) 10 + #ifndef IB_MAD_H 38 11 #define IB_MAD_H 39 12 40 13 #include <linux/list.h>
+2 -29
include/rdma/ib_marshall.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 - #if !defined(IB_USER_MARSHALL_H) 6 + #ifndef IB_USER_MARSHALL_H 34 7 #define IB_USER_MARSHALL_H 35 8 36 9 #include <rdma/ib_verbs.h>
+1 -28
include/rdma/ib_pack.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef IB_PACK_H
+2 -29
include/rdma/ib_pma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 4 * All rights reserved. 4 5 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 - * 6 - * This software is available to you under a choice of one of two 7 - * licenses. You may choose to be licensed under the terms of the GNU 8 - * General Public License (GPL) Version 2, available from the file 9 - * COPYING in the main directory of this source tree, or the 10 - * OpenIB.org BSD license below: 11 - * 12 - * Redistribution and use in source and binary forms, with or 13 - * without modification, are permitted provided that the following 14 - * conditions are met: 15 - * 16 - * - Redistributions of source code must retain the above 17 - * copyright notice, this list of conditions and the following 18 - * disclaimer. 19 - * 20 - * - Redistributions in binary form must reproduce the above 21 - * copyright notice, this list of conditions and the following 22 - * disclaimer in the documentation and/or other materials 23 - * provided with the distribution. 24 - * 25 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 - * SOFTWARE. 33 6 */ 34 7 35 - #if !defined(IB_PMA_H) 8 + #ifndef IB_PMA_H 36 9 #define IB_PMA_H 37 10 38 11 #include <rdma/ib_mad.h>
+1 -28
include/rdma/ib_sa.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 4 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 4 5 * Copyright (c) 2006 Intel Corporation. All rights reserved. 5 - * 6 - * This software is available to you under a choice of one of two 7 - * licenses. You may choose to be licensed under the terms of the GNU 8 - * General Public License (GPL) Version 2, available from the file 9 - * COPYING in the main directory of this source tree, or the 10 - * OpenIB.org BSD license below: 11 - * 12 - * Redistribution and use in source and binary forms, with or 13 - * without modification, are permitted provided that the following 14 - * conditions are met: 15 - * 16 - * - Redistributions of source code must retain the above 17 - * copyright notice, this list of conditions and the following 18 - * disclaimer. 19 - * 20 - * - Redistributions in binary form must reproduce the above 21 - * copyright notice, this list of conditions and the following 22 - * disclaimer in the documentation and/or other materials 23 - * provided with the distribution. 24 - * 25 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 - * SOFTWARE. 33 6 */ 34 7 35 8 #ifndef IB_SA_H
+2 -29
include/rdma/ib_smi.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 4 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 4 5 * Copyright (c) 2004 Intel Corporation. All rights reserved. 5 6 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 6 7 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 7 - * 8 - * This software is available to you under a choice of one of two 9 - * licenses. You may choose to be licensed under the terms of the GNU 10 - * General Public License (GPL) Version 2, available from the file 11 - * COPYING in the main directory of this source tree, or the 12 - * OpenIB.org BSD license below: 13 - * 14 - * Redistribution and use in source and binary forms, with or 15 - * without modification, are permitted provided that the following 16 - * conditions are met: 17 - * 18 - * - Redistributions of source code must retain the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer. 21 - * 22 - * - Redistributions in binary form must reproduce the above 23 - * copyright notice, this list of conditions and the following 24 - * disclaimer in the documentation and/or other materials 25 - * provided with the distribution. 26 - * 27 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 - * SOFTWARE. 35 8 */ 36 9 37 - #if !defined(IB_SMI_H) 10 + #ifndef IB_SMI_H 38 11 #define IB_SMI_H 39 12 40 13 #include <rdma/ib_mad.h>
+1 -28
include/rdma/ib_umem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2007 Cisco Systems. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef IB_UMEM_H
+1 -28
include/rdma/ib_umem_odp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2014 Mellanox Technologies. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef IB_UMEM_ODP_H
+31 -69
include/rdma/ib_verbs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 4 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. ··· 7 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 8 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 9 8 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. 10 - * 11 - * This software is available to you under a choice of one of two 12 - * licenses. You may choose to be licensed under the terms of the GNU 13 - * General Public License (GPL) Version 2, available from the file 14 - * COPYING in the main directory of this source tree, or the 15 - * OpenIB.org BSD license below: 16 - * 17 - * Redistribution and use in source and binary forms, with or 18 - * without modification, are permitted provided that the following 19 - * conditions are met: 20 - * 21 - * - Redistributions of source code must retain the above 22 - * copyright notice, this list of conditions and the following 23 - * disclaimer. 24 - * 25 - * - Redistributions in binary form must reproduce the above 26 - * copyright notice, this list of conditions and the following 27 - * disclaimer in the documentation and/or other materials 28 - * provided with the distribution. 29 - * 30 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 - * SOFTWARE. 38 9 */ 39 10 40 - #if !defined(IB_VERBS_H) 11 + #ifndef IB_VERBS_H 41 12 #define IB_VERBS_H 42 13 43 14 #include <linux/types.h> ··· 48 75 struct ib_uqp_object; 49 76 struct ib_usrq_object; 50 77 struct ib_uwq_object; 78 + struct rdma_cm_id; 51 79 52 80 extern struct workqueue_struct *ib_wq; 53 81 extern struct workqueue_struct *ib_comp_wq; ··· 1540 1566 struct ib_device *device; 1541 1567 atomic_t usecnt; /* count all exposed resources */ 1542 1568 struct inode *inode; 1543 - 1544 - struct mutex tgt_qp_mutex; 1545 - struct list_head tgt_qp_list; 1569 + struct rw_semaphore tgt_qps_rwsem; 1570 + struct xarray tgt_qps; 1546 1571 }; 1547 1572 1548 1573 struct ib_ah { ··· 2243 2270 struct ib_odp_counters { 2244 2271 atomic64_t faults; 2245 2272 atomic64_t invalidations; 2273 + atomic64_t prefetch; 2246 2274 }; 2247 2275 2248 2276 struct ib_counters { ··· 2450 2476 struct ib_pd *pd, struct ib_udata *udata); 2451 2477 int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); 2452 2478 struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, 2453 - u32 max_num_sg, struct ib_udata *udata); 2479 + u32 max_num_sg); 2454 2480 struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd, 2455 2481 u32 max_num_data_sg, 2456 2482 u32 max_num_meta_sg); ··· 2467 2493 int (*dealloc_mw)(struct ib_mw *mw); 2468 2494 int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); 2469 2495 int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); 2470 - struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, 2471 - struct ib_udata *udata); 2472 - int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); 2496 + int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); 2497 + void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); 2473 2498 struct ib_flow *(*create_flow)(struct ib_qp *qp, 2474 2499 struct ib_flow_attr *flow_attr, 2475 2500 int domain, struct ib_udata *udata); ··· 2512 2539 struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, 2513 2540 struct ib_dm_mr_attr *attr, 2514 2541 struct uverbs_attr_bundle *attrs); 2515 - struct ib_counters *(*create_counters)( 2516 - struct ib_device *device, struct uverbs_attr_bundle *attrs); 2517 - int (*destroy_counters)(struct ib_counters *counters); 2542 + int (*create_counters)(struct ib_counters *counters, 2543 + struct uverbs_attr_bundle *attrs); 2544 + void (*destroy_counters)(struct ib_counters *counters); 2518 2545 int (*read_counters)(struct ib_counters *counters, 2519 2546 struct ib_counters_read_attr *counters_read_attr, 2520 2547 struct uverbs_attr_bundle *attrs); ··· 2554 2581 /** 2555 2582 * Allows rdma drivers to add their own restrack attributes. 2556 2583 */ 2557 - int (*fill_res_entry)(struct sk_buff *msg, 2558 - struct rdma_restrack_entry *entry); 2584 + int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); 2585 + int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr); 2586 + int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); 2587 + int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq); 2588 + int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); 2589 + int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp); 2590 + int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); 2559 2591 2560 2592 /* Device lifecycle callbacks */ 2561 2593 /* ··· 2615 2637 * Allows rdma drivers to add their own restrack attributes 2616 2638 * dumped via 'rdma stat' iproute2 command. 2617 2639 */ 2618 - int (*fill_stat_entry)(struct sk_buff *msg, 2619 - struct rdma_restrack_entry *entry); 2640 + int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); 2641 + 2642 + /* query driver for its ucontext properties */ 2643 + int (*query_ucontext)(struct ib_ucontext *context, 2644 + struct uverbs_attr_bundle *attrs); 2620 2645 2621 2646 DECLARE_RDMA_OBJ_SIZE(ib_ah); 2647 + DECLARE_RDMA_OBJ_SIZE(ib_counters); 2622 2648 DECLARE_RDMA_OBJ_SIZE(ib_cq); 2623 2649 DECLARE_RDMA_OBJ_SIZE(ib_pd); 2624 2650 DECLARE_RDMA_OBJ_SIZE(ib_srq); 2625 2651 DECLARE_RDMA_OBJ_SIZE(ib_ucontext); 2652 + DECLARE_RDMA_OBJ_SIZE(ib_xrcd); 2626 2653 }; 2627 2654 2628 2655 struct ib_core_device { ··· 4246 4263 return ib_dereg_mr_user(mr, NULL); 4247 4264 } 4248 4265 4249 - struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, 4250 - u32 max_num_sg, struct ib_udata *udata); 4251 - 4252 - static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, 4253 - enum ib_mr_type mr_type, u32 max_num_sg) 4254 - { 4255 - return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); 4256 - } 4266 + struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 4267 + u32 max_num_sg); 4257 4268 4258 4269 struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, 4259 4270 u32 max_num_data_sg, ··· 4298 4321 */ 4299 4322 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); 4300 4323 4301 - /** 4302 - * ib_alloc_xrcd - Allocates an XRC domain. 4303 - * @device: The device on which to allocate the XRC domain. 4304 - * @caller: Module name for kernel consumers 4305 - */ 4306 - struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); 4307 - #define ib_alloc_xrcd(device) \ 4308 - __ib_alloc_xrcd((device), KBUILD_MODNAME) 4309 - 4310 - /** 4311 - * ib_dealloc_xrcd - Deallocates an XRC domain. 4312 - * @xrcd: The XRC domain to deallocate. 4313 - * @udata: Valid user data or NULL for kernel object 4314 - */ 4315 - int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 4324 + struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, 4325 + struct inode *inode, struct ib_udata *udata); 4326 + int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); 4316 4327 4317 4328 static inline int ib_check_mr_access(int flags) 4318 4329 { ··· 4382 4417 int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); 4383 4418 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, 4384 4419 u32 wq_attr_mask); 4385 - struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, 4386 - struct ib_rwq_ind_table_init_attr* 4387 - wq_ind_table_init_attr); 4388 4420 int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); 4389 4421 4390 4422 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+2 -28
include/rdma/iw_cm.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 3 4 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 4 - * 5 - * This software is available to you under a choice of one of two 6 - * licenses. You may choose to be licensed under the terms of the GNU 7 - * General Public License (GPL) Version 2, available from the file 8 - * COPYING in the main directory of this source tree, or the 9 - * OpenIB.org BSD license below: 10 - * 11 - * Redistribution and use in source and binary forms, with or 12 - * without modification, are permitted provided that the following 13 - * conditions are met: 14 - * 15 - * - Redistributions of source code must retain the above 16 - * copyright notice, this list of conditions and the following 17 - * disclaimer. 18 - * 19 - * - Redistributions in binary form must reproduce the above 20 - * copyright notice, this list of conditions and the following 21 - * disclaimer in the documentation and/or other materials 22 - * provided with the distribution. 23 - * 24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 - * SOFTWARE. 32 5 */ 6 + 33 7 #ifndef IW_CM_H 34 8 #define IW_CM_H 35 9
+2 -28
include/rdma/iw_portmap.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2014 Intel Corporation. All rights reserved. 3 4 * Copyright (c) 2014 Chelsio, Inc. All rights reserved. 4 - * 5 - * This software is available to you under a choice of one of two 6 - * licenses. You may choose to be licensed under the terms of the GNU 7 - * General Public License (GPL) Version 2, available from the file 8 - * COPYING in the main directory of this source tree, or the 9 - * OpenIB.org BSD license below: 10 - * 11 - * Redistribution and use in source and binary forms, with or 12 - * without modification, are permitted provided that the following 13 - * conditions are met: 14 - * 15 - * - Redistributions of source code must retain the above 16 - * copyright notice, this list of conditions and the following 17 - * disclaimer. 18 - * 19 - * - Redistributions in binary form must reproduce the above 20 - * copyright notice, this list of conditions and the following 21 - * disclaimer in the documentation and/or other materials 22 - * provided with the distribution. 23 - * 24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 - * SOFTWARE. 32 5 */ 6 + 33 7 #ifndef _IW_PORTMAP_H 34 8 #define _IW_PORTMAP_H 35 9
+1 -43
include/rdma/opa_addr.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright(c) 2017 Intel Corporation. 3 - * 4 - * This file is provided under a dual BSD/GPLv2 license. When using or 5 - * redistributing this file, you may do so under either license. 6 - * 7 - * GPL LICENSE SUMMARY 8 - * 9 - * This program is free software; you can redistribute it and/or modify 10 - * it under the terms of version 2 of the GNU General Public License as 11 - * published by the Free Software Foundation. 12 - * 13 - * This program is distributed in the hope that it will be useful, but 14 - * WITHOUT ANY WARRANTY; without even the implied warranty of 15 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 - * General Public License for more details. 17 - * 18 - * BSD LICENSE 19 - * 20 - * Redistribution and use in source and binary forms, with or without 21 - * modification, are permitted provided that the following conditions 22 - * are met: 23 - * 24 - * - Redistributions of source code must retain the above copyright 25 - * notice, this list of conditions and the following disclaimer. 26 - * - Redistributions in binary form must reproduce the above copyright 27 - * notice, this list of conditions and the following disclaimer in 28 - * the documentation and/or other materials provided with the 29 - * distribution. 30 - * - Neither the name of Intel Corporation nor the names of its 31 - * contributors may be used to endorse or promote products derived 32 - * from this software without specific prior written permission. 33 - * 34 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 - * 46 4 */ 47 5 48 6 #ifndef OPA_ADDR_H
+2 -29
include/rdma/opa_port_info.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2014-2020 Intel Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 - #if !defined(OPA_PORT_INFO_H) 6 + #ifndef OPA_PORT_INFO_H 34 7 #define OPA_PORT_INFO_H 35 8 36 9 #include <rdma/opa_smi.h>
+2 -29
include/rdma/opa_smi.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2014 Intel Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 - #if !defined(OPA_SMI_H) 6 + #ifndef OPA_SMI_H 34 7 #define OPA_SMI_H 35 8 36 9 #include <rdma/ib_mad.h>
+4 -45
include/rdma/opa_vnic.h
··· 1 - #ifndef _OPA_VNIC_H 2 - #define _OPA_VNIC_H 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 3 2 /* 4 3 * Copyright(c) 2017 - 2020 Intel Corporation. 5 - * 6 - * This file is provided under a dual BSD/GPLv2 license. When using or 7 - * redistributing this file, you may do so under either license. 8 - * 9 - * GPL LICENSE SUMMARY 10 - * 11 - * This program is free software; you can redistribute it and/or modify 12 - * it under the terms of version 2 of the GNU General Public License as 13 - * published by the Free Software Foundation. 14 - * 15 - * This program is distributed in the hope that it will be useful, but 16 - * WITHOUT ANY WARRANTY; without even the implied warranty of 17 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 - * General Public License for more details. 19 - * 20 - * BSD LICENSE 21 - * 22 - * Redistribution and use in source and binary forms, with or without 23 - * modification, are permitted provided that the following conditions 24 - * are met: 25 - * 26 - * - Redistributions of source code must retain the above copyright 27 - * notice, this list of conditions and the following disclaimer. 28 - * - Redistributions in binary form must reproduce the above copyright 29 - * notice, this list of conditions and the following disclaimer in 30 - * the documentation and/or other materials provided with the 31 - * distribution. 32 - * - Neither the name of Intel Corporation nor the names of its 33 - * contributors may be used to endorse or promote products derived 34 - * from this software without specific prior written permission. 35 - * 36 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 37 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 - * 48 4 */ 5 + 6 + #ifndef _OPA_VNIC_H 7 + #define _OPA_VNIC_H 49 8 50 9 /* 51 10 * This file contains Intel Omni-Path (OPA) Virtual Network Interface
+2 -29
include/rdma/rdma_cm.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 4 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 - * 5 - * This software is available to you under a choice of one of two 6 - * licenses. You may choose to be licensed under the terms of the GNU 7 - * General Public License (GPL) Version 2, available from the file 8 - * COPYING in the main directory of this source tree, or the 9 - * OpenIB.org BSD license below: 10 - * 11 - * Redistribution and use in source and binary forms, with or 12 - * without modification, are permitted provided that the following 13 - * conditions are met: 14 - * 15 - * - Redistributions of source code must retain the above 16 - * copyright notice, this list of conditions and the following 17 - * disclaimer. 18 - * 19 - * - Redistributions in binary form must reproduce the above 20 - * copyright notice, this list of conditions and the following 21 - * disclaimer in the documentation and/or other materials 22 - * provided with the distribution. 23 - * 24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 - * SOFTWARE. 32 5 */ 33 6 34 - #if !defined(RDMA_CM_H) 7 + #ifndef RDMA_CM_H 35 8 #define RDMA_CM_H 36 9 37 10 #include <linux/socket.h>
+2 -29
include/rdma/rdma_cm_ib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 - #if !defined(RDMA_CM_IB_H) 6 + #ifndef RDMA_CM_IB_H 34 7 #define RDMA_CM_IB_H 35 8 36 9 #include <rdma/rdma_cm.h>
+1 -1
include/rdma/rdma_netlink.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + 2 3 #ifndef _RDMA_NETLINK_H 3 4 #define _RDMA_NETLINK_H 4 - 5 5 6 6 #include <linux/netlink.h> 7 7 #include <uapi/rdma/rdma_netlink.h>
+4 -46
include/rdma/rdma_vt.h
··· 1 - #ifndef DEF_RDMA_VT_H 2 - #define DEF_RDMA_VT_H 3 - 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 4 2 /* 5 3 * Copyright(c) 2016 - 2019 Intel Corporation. 6 - * 7 - * This file is provided under a dual BSD/GPLv2 license. When using or 8 - * redistributing this file, you may do so under either license. 9 - * 10 - * GPL LICENSE SUMMARY 11 - * 12 - * This program is free software; you can redistribute it and/or modify 13 - * it under the terms of version 2 of the GNU General Public License as 14 - * published by the Free Software Foundation. 15 - * 16 - * This program is distributed in the hope that it will be useful, but 17 - * WITHOUT ANY WARRANTY; without even the implied warranty of 18 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 - * General Public License for more details. 20 - * 21 - * BSD LICENSE 22 - * 23 - * Redistribution and use in source and binary forms, with or without 24 - * modification, are permitted provided that the following conditions 25 - * are met: 26 - * 27 - * - Redistributions of source code must retain the above copyright 28 - * notice, this list of conditions and the following disclaimer. 29 - * - Redistributions in binary form must reproduce the above copyright 30 - * notice, this list of conditions and the following disclaimer in 31 - * the documentation and/or other materials provided with the 32 - * distribution. 33 - * - Neither the name of Intel Corporation nor the names of its 34 - * contributors may be used to endorse or promote products derived 35 - * from this software without specific prior written permission. 36 - * 37 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 - * 49 4 */ 5 + 6 + #ifndef DEF_RDMA_VT_H 7 + #define DEF_RDMA_VT_H 50 8 51 9 /* 52 10 * Structure that low level drivers will populate in order to register with the
+5 -50
include/rdma/rdmavt_cq.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 2 + /* 3 + * Copyright(c) 2016 - 2018 Intel Corporation. 4 + */ 5 + 1 6 #ifndef DEF_RDMAVT_INCCQ_H 2 7 #define DEF_RDMAVT_INCCQ_H 3 - 4 - /* 5 - * 6 - * This file is provided under a dual BSD/GPLv2 license. When using or 7 - * redistributing this file, you may do so under either license. 8 - * 9 - * GPL LICENSE SUMMARY 10 - * 11 - * Copyright(c) 2016 - 2018 Intel Corporation. 12 - * 13 - * This program is free software; you can redistribute it and/or modify 14 - * it under the terms of version 2 of the GNU General Public License as 15 - * published by the Free Software Foundation. 16 - * 17 - * This program is distributed in the hope that it will be useful, but 18 - * WITHOUT ANY WARRANTY; without even the implied warranty of 19 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 - * General Public License for more details. 21 - * 22 - * BSD LICENSE 23 - * 24 - * Copyright(c) 2015 Intel Corporation. 25 - * 26 - * Redistribution and use in source and binary forms, with or without 27 - * modification, are permitted provided that the following conditions 28 - * are met: 29 - * 30 - * - Redistributions of source code must retain the above copyright 31 - * notice, this list of conditions and the following disclaimer. 32 - * - Redistributions in binary form must reproduce the above copyright 33 - * notice, this list of conditions and the following disclaimer in 34 - * the documentation and/or other materials provided with the 35 - * distribution. 36 - * - Neither the name of Intel Corporation nor the names of its 37 - * contributors may be used to endorse or promote products derived 38 - * from this software without specific prior written permission. 39 - * 40 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 41 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 42 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 43 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 44 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 45 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 46 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 47 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 48 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 49 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 50 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 51 - * 52 - */ 53 8 54 9 #include <linux/kthread.h> 55 10 #include <rdma/ib_user_verbs.h>
+4 -46
include/rdma/rdmavt_mr.h
··· 1 - #ifndef DEF_RDMAVT_INCMR_H 2 - #define DEF_RDMAVT_INCMR_H 3 - 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 4 2 /* 5 3 * Copyright(c) 2016 Intel Corporation. 6 - * 7 - * This file is provided under a dual BSD/GPLv2 license. When using or 8 - * redistributing this file, you may do so under either license. 9 - * 10 - * GPL LICENSE SUMMARY 11 - * 12 - * This program is free software; you can redistribute it and/or modify 13 - * it under the terms of version 2 of the GNU General Public License as 14 - * published by the Free Software Foundation. 15 - * 16 - * This program is distributed in the hope that it will be useful, but 17 - * WITHOUT ANY WARRANTY; without even the implied warranty of 18 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 - * General Public License for more details. 20 - * 21 - * BSD LICENSE 22 - * 23 - * Redistribution and use in source and binary forms, with or without 24 - * modification, are permitted provided that the following conditions 25 - * are met: 26 - * 27 - * - Redistributions of source code must retain the above copyright 28 - * notice, this list of conditions and the following disclaimer. 29 - * - Redistributions in binary form must reproduce the above copyright 30 - * notice, this list of conditions and the following disclaimer in 31 - * the documentation and/or other materials provided with the 32 - * distribution. 33 - * - Neither the name of Intel Corporation nor the names of its 34 - * contributors may be used to endorse or promote products derived 35 - * from this software without specific prior written permission. 36 - * 37 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 - * 49 4 */ 5 + 6 + #ifndef DEF_RDMAVT_INCMR_H 7 + #define DEF_RDMAVT_INCMR_H 50 8 51 9 /* 52 10 * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
+4 -46
include/rdma/rdmavt_qp.h
··· 1 - #ifndef DEF_RDMAVT_INCQP_H 2 - #define DEF_RDMAVT_INCQP_H 3 - 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 4 2 /* 5 3 * Copyright(c) 2016 - 2020 Intel Corporation. 6 - * 7 - * This file is provided under a dual BSD/GPLv2 license. When using or 8 - * redistributing this file, you may do so under either license. 9 - * 10 - * GPL LICENSE SUMMARY 11 - * 12 - * This program is free software; you can redistribute it and/or modify 13 - * it under the terms of version 2 of the GNU General Public License as 14 - * published by the Free Software Foundation. 15 - * 16 - * This program is distributed in the hope that it will be useful, but 17 - * WITHOUT ANY WARRANTY; without even the implied warranty of 18 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 - * General Public License for more details. 20 - * 21 - * BSD LICENSE 22 - * 23 - * Redistribution and use in source and binary forms, with or without 24 - * modification, are permitted provided that the following conditions 25 - * are met: 26 - * 27 - * - Redistributions of source code must retain the above copyright 28 - * notice, this list of conditions and the following disclaimer. 29 - * - Redistributions in binary form must reproduce the above copyright 30 - * notice, this list of conditions and the following disclaimer in 31 - * the documentation and/or other materials provided with the 32 - * distribution. 33 - * - Neither the name of Intel Corporation nor the names of its 34 - * contributors may be used to endorse or promote products derived 35 - * from this software without specific prior written permission. 36 - * 37 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 - * 49 4 */ 5 + 6 + #ifndef DEF_RDMAVT_INCQP_H 7 + #define DEF_RDMAVT_INCQP_H 50 8 51 9 #include <rdma/rdma_vt.h> 52 10 #include <rdma/ib_pack.h>
+2 -28
include/rdma/uverbs_ioctl.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef _UVERBS_IOCTL_ ··· 625 652 struct ib_udata ucore; 626 653 struct ib_uverbs_file *ufile; 627 654 struct ib_ucontext *context; 655 + struct ib_uobject *uobject; 628 656 DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); 629 657 struct uverbs_attr attrs[]; 630 658 };
+1 -28
include/rdma/uverbs_named_ioctl.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef _UVERBS_NAMED_IOCTL_
+15 -28
include/rdma/uverbs_std_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef _UVERBS_STD_TYPES__ ··· 81 108 struct uverbs_attr_bundle *attrs) 82 109 { 83 110 rdma_alloc_abort_uobject(uobj, attrs, false); 111 + } 112 + 113 + static inline void uobj_finalize_uobj_create(struct ib_uobject *uobj, 114 + struct uverbs_attr_bundle *attrs) 115 + { 116 + /* 117 + * Tell the core code that the write() handler has completed 118 + * initializing the object and that the core should commit or 119 + * abort this object based upon the return code from the write() 120 + * method. Similar to what uverbs_finalize_uobj_create() does for 121 + * ioctl() 122 + */ 123 + WARN_ON(attrs->uobject); 124 + attrs->uobject = uobj; 84 125 } 85 126 86 127 static inline struct ib_uobject *
+1 -28
include/rdma/uverbs_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 1 2 /* 2 3 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 4 */ 32 5 33 6 #ifndef _UVERBS_TYPES_
+14 -1
include/uapi/rdma/efa-abi.h
··· 1 1 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ 2 2 /* 3 - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef EFA_ABI_USER_H ··· 20 20 * hex bit offset of the field. 21 21 */ 22 22 23 + enum { 24 + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH = 1 << 0, 25 + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR = 1 << 1, 26 + }; 27 + 28 + struct efa_ibv_alloc_ucontext_cmd { 29 + __u32 comp_mask; 30 + __u8 reserved_20[4]; 31 + }; 32 + 23 33 enum efa_ibv_user_cmds_supp_udata { 24 34 EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0, 25 35 EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1, ··· 41 31 __u16 sub_cqs_per_cq; 42 32 __u16 inline_buf_size; 43 33 __u32 max_llq_size; /* bytes */ 34 + __u16 max_tx_batch; /* units of 64 bytes */ 35 + __u16 min_sq_wr; 36 + __u8 reserved_a0[4]; 44 37 }; 45 38 46 39 struct efa_ibv_alloc_pd_resp {
+15
include/uapi/rdma/ib_user_ioctl_cmds.h
··· 69 69 UVERBS_METHOD_INFO_HANDLES, 70 70 UVERBS_METHOD_QUERY_PORT, 71 71 UVERBS_METHOD_GET_CONTEXT, 72 + UVERBS_METHOD_QUERY_CONTEXT, 72 73 }; 73 74 74 75 enum uverbs_attrs_invoke_write_cmd_attr_ids { ··· 86 85 enum uverbs_attrs_get_context_attr_ids { 87 86 UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, 88 87 UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, 88 + }; 89 + 90 + enum uverbs_attrs_query_context_attr_ids { 91 + UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, 92 + UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, 89 93 }; 90 94 91 95 enum uverbs_attrs_create_cq_cmd_attr_ids { ··· 248 242 UVERBS_METHOD_DM_MR_REG, 249 243 UVERBS_METHOD_MR_DESTROY, 250 244 UVERBS_METHOD_ADVISE_MR, 245 + UVERBS_METHOD_QUERY_MR, 251 246 }; 252 247 253 248 enum uverbs_attrs_mr_destroy_ids { ··· 260 253 UVERBS_ATTR_ADVISE_MR_ADVICE, 261 254 UVERBS_ATTR_ADVISE_MR_FLAGS, 262 255 UVERBS_ATTR_ADVISE_MR_SGE_LIST, 256 + }; 257 + 258 + enum uverbs_attrs_query_mr_cmd_attr_ids { 259 + UVERBS_ATTR_QUERY_MR_HANDLE, 260 + UVERBS_ATTR_QUERY_MR_RESP_LKEY, 261 + UVERBS_ATTR_QUERY_MR_RESP_RKEY, 262 + UVERBS_ATTR_QUERY_MR_RESP_LENGTH, 263 + UVERBS_ATTR_QUERY_MR_RESP_IOVA, 263 264 }; 264 265 265 266 enum uverbs_attrs_create_counters_cmd_attr_ids {
+15 -1
include/uapi/rdma/mlx5_user_ioctl_cmds.h
··· 228 228 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, 229 229 }; 230 230 231 + enum mlx5_ib_device_query_context_attrs { 232 + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT), 233 + }; 234 + 231 235 #define MLX5_IB_DW_MATCH_PARAM 0x80 232 236 233 237 struct mlx5_ib_match_params { ··· 263 259 MLX5_IB_ATTR_CREATE_FLOW_FLAGS, 264 260 }; 265 261 266 - enum mlx5_ib_destoy_flow_attrs { 262 + enum mlx5_ib_destroy_flow_attrs { 267 263 MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), 268 264 }; 269 265 ··· 288 284 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, 289 285 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, 290 286 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, 287 + }; 288 + 289 + enum mlx5_ib_query_pd_attrs { 290 + MLX5_IB_ATTR_QUERY_PD_HANDLE = (1U << UVERBS_ID_NS_SHIFT), 291 + MLX5_IB_ATTR_QUERY_PD_RESP_PDN, 292 + }; 293 + 294 + enum mlx5_ib_pd_methods { 295 + MLX5_IB_METHOD_PD_QUERY = (1U << UVERBS_ID_NS_SHIFT), 296 + 291 297 }; 292 298 293 299 #endif
+7 -3
include/uapi/rdma/qedr-abi.h
··· 39 39 40 40 /* user kernel communication data structures. */ 41 41 enum qedr_alloc_ucontext_flags { 42 - QEDR_ALLOC_UCTX_RESERVED = 1 << 0, 43 - QEDR_ALLOC_UCTX_DB_REC = 1 << 1 42 + QEDR_ALLOC_UCTX_EDPM_MODE = 1 << 0, 43 + QEDR_ALLOC_UCTX_DB_REC = 1 << 1, 44 + QEDR_SUPPORT_DPM_SIZES = 1 << 2, 44 45 }; 45 46 46 47 struct qedr_alloc_ucontext_req { ··· 51 50 52 51 #define QEDR_LDPM_MAX_SIZE (8192) 53 52 #define QEDR_EDPM_TRANS_SIZE (64) 53 + #define QEDR_EDPM_MAX_SIZE (ROCE_REQ_MAX_INLINE_DATA_SIZE) 54 54 55 55 enum qedr_rdma_dpm_type { 56 56 QEDR_DPM_TYPE_NONE = 0, 57 57 QEDR_DPM_TYPE_ROCE_ENHANCED = 1 << 0, 58 58 QEDR_DPM_TYPE_ROCE_LEGACY = 1 << 1, 59 59 QEDR_DPM_TYPE_IWARP_LEGACY = 1 << 2, 60 - QEDR_DPM_TYPE_RESERVED = 1 << 3, 60 + QEDR_DPM_TYPE_ROCE_EDPM_MODE = 1 << 3, 61 61 QEDR_DPM_SIZES_SET = 1 << 4, 62 62 }; 63 63 ··· 79 77 __u16 ldpm_limit_size; 80 78 __u8 edpm_trans_size; 81 79 __u8 reserved; 80 + __u16 edpm_limit_size; 81 + __u8 padding[6]; 82 82 }; 83 83 84 84 struct qedr_alloc_pd_ureq {
+9
include/uapi/rdma/rdma_netlink.h
··· 287 287 288 288 RDMA_NLDEV_CMD_STAT_DEL, 289 289 290 + RDMA_NLDEV_CMD_RES_QP_GET_RAW, 291 + 292 + RDMA_NLDEV_CMD_RES_CQ_GET_RAW, 293 + 294 + RDMA_NLDEV_CMD_RES_MR_GET_RAW, 295 + 290 296 RDMA_NLDEV_NUM_OPS 291 297 }; 292 298 ··· 531 525 */ 532 526 RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */ 533 527 528 + RDMA_NLDEV_ATTR_RES_RAW, /* binary */ 529 + 534 530 /* 535 531 * Always the end 536 532 */ ··· 569 561 */ 570 562 enum rdma_nl_counter_mask { 571 563 RDMA_COUNTER_MASK_QP_TYPE = 1, 564 + RDMA_COUNTER_MASK_PID = 1 << 1, 572 565 }; 573 566 #endif /* _UAPI_RDMA_NETLINK_H */
+1 -1
include/uapi/rdma/rdma_user_ioctl.h
··· 43 43 44 44 /* 45 45 * General blocks assignments 46 - * It is closed on purpose do not expose it it user space 46 + * It is closed on purpose - do not expose it to user space 47 47 * #define MAD_CMD_BASE 0x00 48 48 * #define HFI1_CMD_BAS 0xE0 49 49 */