···969969 file->async_file = NULL;970970 kref_init(&file->ref);971971 mutex_init(&file->mutex);972972+ mutex_init(&file->cleanup_mutex);972973973974 filp->private_data = file;974975 kobject_get(&dev->kobj);···995994{996995 struct ib_uverbs_file *file = filp->private_data;997996 struct ib_uverbs_device *dev = file->device;998998- struct ib_ucontext *ucontext = NULL;997997+998998+ mutex_lock(&file->cleanup_mutex);999999+ if (file->ucontext) {10001000+ ib_uverbs_cleanup_ucontext(file, file->ucontext);10011001+ file->ucontext = NULL;10021002+ }10031003+ mutex_unlock(&file->cleanup_mutex);999100410001005 mutex_lock(&file->device->lists_mutex);10011001- ucontext = file->ucontext;10021002- file->ucontext = NULL;10031006 if (!file->is_closed) {10041007 list_del(&file->list);10051008 file->is_closed = 1;10061009 }10071010 mutex_unlock(&file->device->lists_mutex);10081008- if (ucontext)10091009- ib_uverbs_cleanup_ucontext(file, ucontext);1010101110111012 if (file->async_file)10121013 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);···12221219 mutex_lock(&uverbs_dev->lists_mutex);12231220 while (!list_empty(&uverbs_dev->uverbs_file_list)) {12241221 struct ib_ucontext *ucontext;12251225-12261222 file = list_first_entry(&uverbs_dev->uverbs_file_list,12271223 struct ib_uverbs_file, list);12281224 file->is_closed = 1;12291229- ucontext = file->ucontext;12301225 list_del(&file->list);12311231- file->ucontext = NULL;12321226 kref_get(&file->ref);12331227 mutex_unlock(&uverbs_dev->lists_mutex);12341234- /* We must release the mutex before going ahead and calling12351235- * disassociate_ucontext. disassociate_ucontext might end up12361236- * indirectly calling uverbs_close, for example due to freeing12371237- * the resources (e.g mmput).12381238- */12281228+12391229 ib_uverbs_event_handler(&file->event_handler, &event);12301230+12311231+ mutex_lock(&file->cleanup_mutex);12321232+ ucontext = file->ucontext;12331233+ file->ucontext = NULL;12341234+ mutex_unlock(&file->cleanup_mutex);12351235+12361236+ /* At this point ib_uverbs_close cannot be running12371237+ * ib_uverbs_cleanup_ucontext12381238+ */12401239 if (ucontext) {12401240+ /* We must release the mutex before going ahead and12411241+ * calling disassociate_ucontext. disassociate_ucontext12421242+ * might end up indirectly calling uverbs_close,12431243+ * for example due to freeing the resources12441244+ * (e.g mmput).12451245+ */12411246 ib_dev->disassociate_ucontext(ucontext);12421247 ib_uverbs_cleanup_ucontext(file, ucontext);12431248 }
-1
drivers/infiniband/hw/hfi1/Kconfig
···33 depends on X86_64 && INFINIBAND_RDMAVT44 select MMU_NOTIFIER55 select CRC3266- default m76 ---help---87 This is a low-level driver for Intel OPA Gen1 adapter.98config HFI1_DEBUG_SDMA_ORDER
+1-1
drivers/infiniband/hw/hfi1/file_ops.c
···225225 sizeof(struct hfi1_base_info));226226 break;227227 case HFI1_IOCTL_CREDIT_UPD:228228- if (uctxt && uctxt->sc)228228+ if (uctxt)229229 sc_return_credits(uctxt->sc);230230 break;231231
+2-2
drivers/infiniband/hw/mlx4/cq.c
···288288 if (cq->resize_buf)289289 return -EBUSY;290290291291- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);291291+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);292292 if (!cq->resize_buf)293293 return -ENOMEM;294294···316316 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))317317 return -EFAULT;318318319319- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);319319+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);320320 if (!cq->resize_buf)321321 return -ENOMEM;322322
+197-1
drivers/infiniband/hw/mlx4/main.c
···20492049 &dev_attr_board_id20502050};2051205120522052+struct diag_counter {20532053+ const char *name;20542054+ u32 offset;20552055+};20562056+20572057+#define DIAG_COUNTER(_name, _offset) \20582058+ { .name = #_name, .offset = _offset }20592059+20602060+static const struct diag_counter diag_basic[] = {20612061+ DIAG_COUNTER(rq_num_lle, 0x00),20622062+ DIAG_COUNTER(sq_num_lle, 0x04),20632063+ DIAG_COUNTER(rq_num_lqpoe, 0x08),20642064+ DIAG_COUNTER(sq_num_lqpoe, 0x0C),20652065+ DIAG_COUNTER(rq_num_lpe, 0x18),20662066+ DIAG_COUNTER(sq_num_lpe, 0x1C),20672067+ DIAG_COUNTER(rq_num_wrfe, 0x20),20682068+ DIAG_COUNTER(sq_num_wrfe, 0x24),20692069+ DIAG_COUNTER(sq_num_mwbe, 0x2C),20702070+ DIAG_COUNTER(sq_num_bre, 0x34),20712071+ DIAG_COUNTER(sq_num_rire, 0x44),20722072+ DIAG_COUNTER(rq_num_rire, 0x48),20732073+ DIAG_COUNTER(sq_num_rae, 0x4C),20742074+ DIAG_COUNTER(rq_num_rae, 0x50),20752075+ DIAG_COUNTER(sq_num_roe, 0x54),20762076+ DIAG_COUNTER(sq_num_tree, 0x5C),20772077+ DIAG_COUNTER(sq_num_rree, 0x64),20782078+ DIAG_COUNTER(rq_num_rnr, 0x68),20792079+ DIAG_COUNTER(sq_num_rnr, 0x6C),20802080+ DIAG_COUNTER(rq_num_oos, 0x100),20812081+ DIAG_COUNTER(sq_num_oos, 0x104),20822082+};20832083+20842084+static const struct diag_counter diag_ext[] = {20852085+ DIAG_COUNTER(rq_num_dup, 0x130),20862086+ DIAG_COUNTER(sq_num_to, 0x134),20872087+};20882088+20892089+static const struct diag_counter diag_device_only[] = {20902090+ DIAG_COUNTER(num_cqovf, 0x1A0),20912091+ DIAG_COUNTER(rq_num_udsdprd, 0x118),20922092+};20932093+20942094+static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,20952095+ u8 port_num)20962096+{20972097+ struct mlx4_ib_dev *dev = to_mdev(ibdev);20982098+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;20992099+21002100+ if (!diag[!!port_num].name)21012101+ return NULL;21022102+21032103+ return rdma_alloc_hw_stats_struct(diag[!!port_num].name,21042104+ diag[!!port_num].num_counters,21052105+ RDMA_HW_STATS_DEFAULT_LIFESPAN);21062106+}21072107+21082108+static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,21092109+ struct rdma_hw_stats *stats,21102110+ u8 port, int index)21112111+{21122112+ struct mlx4_ib_dev *dev = to_mdev(ibdev);21132113+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;21142114+ u32 hw_value[ARRAY_SIZE(diag_device_only) +21152115+ ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};21162116+ int ret;21172117+ int i;21182118+21192119+ ret = mlx4_query_diag_counters(dev->dev,21202120+ MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,21212121+ diag[!!port].offset, hw_value,21222122+ diag[!!port].num_counters, port);21232123+21242124+ if (ret)21252125+ return ret;21262126+21272127+ for (i = 0; i < diag[!!port].num_counters; i++)21282128+ stats->value[i] = hw_value[i];21292129+21302130+ return diag[!!port].num_counters;21312131+}21322132+21332133+static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,21342134+ const char ***name,21352135+ u32 **offset,21362136+ u32 *num,21372137+ bool port)21382138+{21392139+ u32 num_counters;21402140+21412141+ num_counters = ARRAY_SIZE(diag_basic);21422142+21432143+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)21442144+ num_counters += ARRAY_SIZE(diag_ext);21452145+21462146+ if (!port)21472147+ num_counters += ARRAY_SIZE(diag_device_only);21482148+21492149+ *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);21502150+ if (!*name)21512151+ return -ENOMEM;21522152+21532153+ *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);21542154+ if (!*offset)21552155+ goto err_name;21562156+21572157+ *num = num_counters;21582158+21592159+ return 0;21602160+21612161+err_name:21622162+ kfree(*name);21632163+ return -ENOMEM;21642164+}21652165+21662166+static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,21672167+ const char **name,21682168+ u32 *offset,21692169+ bool port)21702170+{21712171+ int i;21722172+ int j;21732173+21742174+ for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {21752175+ name[i] = diag_basic[i].name;21762176+ offset[i] = diag_basic[i].offset;21772177+ }21782178+21792179+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {21802180+ for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {21812181+ name[j] = diag_ext[i].name;21822182+ offset[j] = diag_ext[i].offset;21832183+ }21842184+ }21852185+21862186+ if (!port) {21872187+ for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {21882188+ name[j] = diag_device_only[i].name;21892189+ offset[j] = diag_device_only[i].offset;21902190+ }21912191+ }21922192+}21932193+21942194+static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)21952195+{21962196+ struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;21972197+ int i;21982198+ int ret;21992199+ bool per_port = !!(ibdev->dev->caps.flags2 &22002200+ MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);22012201+22022202+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {22032203+ /* i == 1 means we are building port counters */22042204+ if (i && !per_port)22052205+ continue;22062206+22072207+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,22082208+ &diag[i].offset,22092209+ &diag[i].num_counters, i);22102210+ if (ret)22112211+ goto err_alloc;22122212+22132213+ mlx4_ib_fill_diag_counters(ibdev, diag[i].name,22142214+ diag[i].offset, i);22152215+ }22162216+22172217+ ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;22182218+ ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;22192219+22202220+ return 0;22212221+22222222+err_alloc:22232223+ if (i) {22242224+ kfree(diag[i - 1].name);22252225+ kfree(diag[i - 1].offset);22262226+ }22272227+22282228+ return ret;22292229+}22302230+22312231+static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)22322232+{22332233+ int i;22342234+22352235+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {22362236+ kfree(ibdev->diag_counters[i].offset);22372237+ kfree(ibdev->diag_counters[i].name);22382238+ }22392239+}22402240+20522241#define MLX4_IB_INVALID_MAC ((u64)-1)20532242static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,20542243 struct net_device *dev,···27412552 for (j = 1; j <= ibdev->dev->caps.num_ports; j++)27422553 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);2743255427442744- if (ib_register_device(&ibdev->ib_dev, NULL))25552555+ if (mlx4_ib_alloc_diag_counters(ibdev))27452556 goto err_steer_free_bitmap;25572557+25582558+ if (ib_register_device(&ibdev->ib_dev, NULL))25592559+ goto err_diag_counters;2746256027472561 if (mlx4_ib_mad_init(ibdev))27482562 goto err_reg;···2811261928122620err_reg:28132621 ib_unregister_device(&ibdev->ib_dev);26222622+26232623+err_diag_counters:26242624+ mlx4_ib_diag_cleanup(ibdev);2814262528152626err_steer_free_bitmap:28162627 kfree(ibdev->ib_uc_qpns_bitmap);···29182723 mlx4_ib_close_sriov(ibdev);29192724 mlx4_ib_mad_cleanup(ibdev);29202725 ib_unregister_device(&ibdev->ib_dev);27262726+ mlx4_ib_diag_cleanup(ibdev);29212727 if (ibdev->iboe.nb.notifier_call) {29222728 if (unregister_netdevice_notifier(&ibdev->iboe.nb))29232729 pr_warn("failure unregistering notifier\n");
···11config INFINIBAND_RDMAVT22 tristate "RDMA verbs transport library"33 depends on 64BIT44- default m54 ---help---65 This is a common software verbs provider for RDMA networks.
+24
drivers/infiniband/sw/rxe/Kconfig
···11+config RDMA_RXE22+ tristate "Software RDMA over Ethernet (RoCE) driver"33+ depends on INET && PCI && INFINIBAND44+ depends on NET_UDP_TUNNEL55+ ---help---66+ This driver implements the InfiniBand RDMA transport over77+ the Linux network stack. It enables a system with a88+ standard Ethernet adapter to interoperate with a RoCE99+ adapter or with another system running the RXE driver.1010+ Documentation on InfiniBand and RoCE can be downloaded at1111+ www.infinibandta.org and www.openfabrics.org. (See also1212+ siw which is a similar software driver for iWARP.)1313+1414+ The driver is split into two layers, one interfaces with the1515+ Linux RDMA stack and implements a kernel or user space1616+ verbs API. The user space verbs API requires a support1717+ library named librxe which is loaded by the generic user1818+ space verbs API, libibverbs. The other layer interfaces1919+ with the Linux network stack at layer 3.2020+2121+ To configure and work with soft-RoCE driver please use the2222+ following wiki page under "configure Soft-RoCE (RXE)" section:2323+2424+ https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");3838+MODULE_DESCRIPTION("Soft RDMA transport");3939+MODULE_LICENSE("Dual BSD/GPL");4040+MODULE_VERSION("0.2");4141+4242+/* free resources for all ports on a device */4343+static void rxe_cleanup_ports(struct rxe_dev *rxe)4444+{4545+ kfree(rxe->port.pkey_tbl);4646+ rxe->port.pkey_tbl = NULL;4747+4848+}4949+5050+/* free resources for a rxe device all objects created for this device must5151+ * have been destroyed5252+ */5353+static void rxe_cleanup(struct rxe_dev *rxe)5454+{5555+ rxe_pool_cleanup(&rxe->uc_pool);5656+ rxe_pool_cleanup(&rxe->pd_pool);5757+ rxe_pool_cleanup(&rxe->ah_pool);5858+ rxe_pool_cleanup(&rxe->srq_pool);5959+ rxe_pool_cleanup(&rxe->qp_pool);6060+ rxe_pool_cleanup(&rxe->cq_pool);6161+ rxe_pool_cleanup(&rxe->mr_pool);6262+ rxe_pool_cleanup(&rxe->mw_pool);6363+ rxe_pool_cleanup(&rxe->mc_grp_pool);6464+ rxe_pool_cleanup(&rxe->mc_elem_pool);6565+6666+ rxe_cleanup_ports(rxe);6767+}6868+6969+/* called when all references have been dropped */7070+void rxe_release(struct kref *kref)7171+{7272+ struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);7373+7474+ rxe_cleanup(rxe);7575+ ib_dealloc_device(&rxe->ib_dev);7676+}7777+7878+void rxe_dev_put(struct rxe_dev *rxe)7979+{8080+ kref_put(&rxe->ref_cnt, rxe_release);8181+}8282+EXPORT_SYMBOL_GPL(rxe_dev_put);8383+8484+/* initialize rxe device parameters */8585+static int rxe_init_device_param(struct rxe_dev *rxe)8686+{8787+ rxe->max_inline_data = RXE_MAX_INLINE_DATA;8888+8989+ rxe->attr.fw_ver = RXE_FW_VER;9090+ rxe->attr.max_mr_size = RXE_MAX_MR_SIZE;9191+ rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;9292+ rxe->attr.vendor_id = RXE_VENDOR_ID;9393+ rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID;9494+ rxe->attr.hw_ver = RXE_HW_VER;9595+ rxe->attr.max_qp = RXE_MAX_QP;9696+ rxe->attr.max_qp_wr = RXE_MAX_QP_WR;9797+ rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;9898+ rxe->attr.max_sge = RXE_MAX_SGE;9999+ rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;100100+ rxe->attr.max_cq = RXE_MAX_CQ;101101+ rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;102102+ rxe->attr.max_mr = RXE_MAX_MR;103103+ rxe->attr.max_pd = RXE_MAX_PD;104104+ rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM;105105+ rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM;106106+ rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;107107+ rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;108108+ rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;109109+ rxe->attr.atomic_cap = RXE_ATOMIC_CAP;110110+ rxe->attr.max_ee = RXE_MAX_EE;111111+ rxe->attr.max_rdd = RXE_MAX_RDD;112112+ rxe->attr.max_mw = RXE_MAX_MW;113113+ rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP;114114+ rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP;115115+ rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP;116116+ rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH;117117+ rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH;118118+ rxe->attr.max_ah = RXE_MAX_AH;119119+ rxe->attr.max_fmr = RXE_MAX_FMR;120120+ rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR;121121+ rxe->attr.max_srq = RXE_MAX_SRQ;122122+ rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR;123123+ rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE;124124+ rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;125125+ rxe->attr.max_pkeys = RXE_MAX_PKEYS;126126+ rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;127127+128128+ rxe->max_ucontext = RXE_MAX_UCONTEXT;129129+130130+ return 0;131131+}132132+133133+/* initialize port attributes */134134+static int rxe_init_port_param(struct rxe_port *port)135135+{136136+ port->attr.state = RXE_PORT_STATE;137137+ port->attr.max_mtu = RXE_PORT_MAX_MTU;138138+ port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;139139+ port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;140140+ port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;141141+ port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;142142+ port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR;143143+ port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR;144144+ port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN;145145+ port->attr.lid = RXE_PORT_LID;146146+ port->attr.sm_lid = RXE_PORT_SM_LID;147147+ port->attr.lmc = RXE_PORT_LMC;148148+ port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM;149149+ port->attr.sm_sl = RXE_PORT_SM_SL;150150+ port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT;151151+ port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY;152152+ port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;153153+ port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;154154+ port->attr.phys_state = RXE_PORT_PHYS_STATE;155155+ port->mtu_cap =156156+ ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);157157+ port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);158158+159159+ return 0;160160+}161161+162162+/* initialize port state, note IB convention that HCA ports are always163163+ * numbered from 1164164+ */165165+static int rxe_init_ports(struct rxe_dev *rxe)166166+{167167+ struct rxe_port *port = &rxe->port;168168+169169+ rxe_init_port_param(port);170170+171171+ if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)172172+ return -EINVAL;173173+174174+ port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,175175+ sizeof(*port->pkey_tbl), GFP_KERNEL);176176+177177+ if (!port->pkey_tbl)178178+ return -ENOMEM;179179+180180+ port->pkey_tbl[0] = 0xffff;181181+ port->port_guid = rxe->ifc_ops->port_guid(rxe);182182+183183+ spin_lock_init(&port->port_lock);184184+185185+ return 0;186186+}187187+188188+/* init pools of managed objects */189189+static int rxe_init_pools(struct rxe_dev *rxe)190190+{191191+ int err;192192+193193+ err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,194194+ rxe->max_ucontext);195195+ if (err)196196+ goto err1;197197+198198+ err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,199199+ rxe->attr.max_pd);200200+ if (err)201201+ goto err2;202202+203203+ err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,204204+ rxe->attr.max_ah);205205+ if (err)206206+ goto err3;207207+208208+ err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,209209+ rxe->attr.max_srq);210210+ if (err)211211+ goto err4;212212+213213+ err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,214214+ rxe->attr.max_qp);215215+ if (err)216216+ goto err5;217217+218218+ err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,219219+ rxe->attr.max_cq);220220+ if (err)221221+ goto err6;222222+223223+ err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,224224+ rxe->attr.max_mr);225225+ if (err)226226+ goto err7;227227+228228+ err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,229229+ rxe->attr.max_mw);230230+ if (err)231231+ goto err8;232232+233233+ err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,234234+ rxe->attr.max_mcast_grp);235235+ if (err)236236+ goto err9;237237+238238+ err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,239239+ rxe->attr.max_total_mcast_qp_attach);240240+ if (err)241241+ goto err10;242242+243243+ return 0;244244+245245+err10:246246+ rxe_pool_cleanup(&rxe->mc_grp_pool);247247+err9:248248+ rxe_pool_cleanup(&rxe->mw_pool);249249+err8:250250+ rxe_pool_cleanup(&rxe->mr_pool);251251+err7:252252+ rxe_pool_cleanup(&rxe->cq_pool);253253+err6:254254+ rxe_pool_cleanup(&rxe->qp_pool);255255+err5:256256+ rxe_pool_cleanup(&rxe->srq_pool);257257+err4:258258+ rxe_pool_cleanup(&rxe->ah_pool);259259+err3:260260+ rxe_pool_cleanup(&rxe->pd_pool);261261+err2:262262+ rxe_pool_cleanup(&rxe->uc_pool);263263+err1:264264+ return err;265265+}266266+267267+/* initialize rxe device state */268268+static int rxe_init(struct rxe_dev *rxe)269269+{270270+ int err;271271+272272+ /* init default device parameters */273273+ rxe_init_device_param(rxe);274274+275275+ err = rxe_init_ports(rxe);276276+ if (err)277277+ goto err1;278278+279279+ err = rxe_init_pools(rxe);280280+ if (err)281281+ goto err2;282282+283283+ /* init pending mmap list */284284+ spin_lock_init(&rxe->mmap_offset_lock);285285+ spin_lock_init(&rxe->pending_lock);286286+ INIT_LIST_HEAD(&rxe->pending_mmaps);287287+ INIT_LIST_HEAD(&rxe->list);288288+289289+ mutex_init(&rxe->usdev_lock);290290+291291+ return 0;292292+293293+err2:294294+ rxe_cleanup_ports(rxe);295295+err1:296296+ return err;297297+}298298+299299+int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)300300+{301301+ struct rxe_port *port = &rxe->port;302302+ enum ib_mtu mtu;303303+304304+ mtu = eth_mtu_int_to_enum(ndev_mtu);305305+306306+ /* Make sure that new MTU in range */307307+ mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;308308+309309+ port->attr.active_mtu = mtu;310310+ port->mtu_cap = ib_mtu_enum_to_int(mtu);311311+312312+ return 0;313313+}314314+EXPORT_SYMBOL(rxe_set_mtu);315315+316316+/* called by ifc layer to create new rxe device.317317+ * The caller should allocate memory for rxe by calling ib_alloc_device.318318+ */319319+int rxe_add(struct rxe_dev *rxe, unsigned int mtu)320320+{321321+ int err;322322+323323+ kref_init(&rxe->ref_cnt);324324+325325+ err = rxe_init(rxe);326326+ if (err)327327+ goto err1;328328+329329+ err = rxe_set_mtu(rxe, mtu);330330+ if (err)331331+ goto err1;332332+333333+ err = rxe_register_device(rxe);334334+ if (err)335335+ goto err1;336336+337337+ return 0;338338+339339+err1:340340+ rxe_dev_put(rxe);341341+ return err;342342+}343343+EXPORT_SYMBOL(rxe_add);344344+345345+/* called by the ifc layer to remove a device */346346+void rxe_remove(struct rxe_dev *rxe)347347+{348348+ rxe_unregister_device(rxe);349349+350350+ rxe_dev_put(rxe);351351+}352352+EXPORT_SYMBOL(rxe_remove);353353+354354+static int __init rxe_module_init(void)355355+{356356+ int err;357357+358358+ /* initialize slab caches for managed objects */359359+ err = rxe_cache_init();360360+ if (err) {361361+ pr_err("rxe: unable to init object pools\n");362362+ return err;363363+ }364364+365365+ err = rxe_net_init();366366+ if (err) {367367+ pr_err("rxe: unable to init\n");368368+ rxe_cache_exit();369369+ return err;370370+ }371371+ pr_info("rxe: loaded\n");372372+373373+ return 0;374374+}375375+376376+static void __exit rxe_module_exit(void)377377+{378378+ rxe_remove_all();379379+ rxe_net_exit();380380+ rxe_cache_exit();381381+382382+ pr_info("rxe: unloaded\n");383383+}384384+385385+module_init(rxe_module_init);386386+module_exit(rxe_module_exit);
+77
drivers/infiniband/sw/rxe/rxe.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_H3535+#define RXE_H3636+3737+#include <linux/module.h>3838+#include <linux/skbuff.h>3939+#include <linux/crc32.h>4040+4141+#include <rdma/ib_verbs.h>4242+#include <rdma/ib_user_verbs.h>4343+#include <rdma/ib_pack.h>4444+#include <rdma/ib_smi.h>4545+#include <rdma/ib_umem.h>4646+#include <rdma/ib_cache.h>4747+#include <rdma/ib_addr.h>4848+4949+#include "rxe_net.h"5050+#include "rxe_opcode.h"5151+#include "rxe_hdr.h"5252+#include "rxe_param.h"5353+#include "rxe_verbs.h"5454+5555+#define RXE_UVERBS_ABI_VERSION (1)5656+5757+#define IB_PHYS_STATE_LINK_UP (5)5858+#define IB_PHYS_STATE_LINK_DOWN (3)5959+6060+#define RXE_ROCE_V2_SPORT (0xc000)6161+6262+int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);6363+6464+int rxe_add(struct rxe_dev *rxe, unsigned int mtu);6565+void rxe_remove(struct rxe_dev *rxe);6666+void rxe_remove_all(void);6767+6868+int rxe_rcv(struct sk_buff *skb);6969+7070+void rxe_dev_put(struct rxe_dev *rxe);7171+struct rxe_dev *net_to_rxe(struct net_device *ndev);7272+struct rxe_dev *get_rxe_by_name(const char* name);7373+7474+void rxe_port_up(struct rxe_dev *rxe);7575+void rxe_port_down(struct rxe_dev *rxe);7676+7777+#endif /* RXE_H */
+98
drivers/infiniband/sw/rxe/rxe_av.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)3838+{3939+ struct rxe_port *port;4040+4141+ if (attr->port_num != 1) {4242+ pr_info("rxe: invalid port_num = %d\n", attr->port_num);4343+ return -EINVAL;4444+ }4545+4646+ port = &rxe->port;4747+4848+ if (attr->ah_flags & IB_AH_GRH) {4949+ if (attr->grh.sgid_index > port->attr.gid_tbl_len) {5050+ pr_info("rxe: invalid sgid index = %d\n",5151+ attr->grh.sgid_index);5252+ return -EINVAL;5353+ }5454+ }5555+5656+ return 0;5757+}5858+5959+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,6060+ struct rxe_av *av, struct ib_ah_attr *attr)6161+{6262+ memset(av, 0, sizeof(*av));6363+ memcpy(&av->grh, &attr->grh, sizeof(attr->grh));6464+ av->port_num = port_num;6565+ return 0;6666+}6767+6868+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,6969+ struct ib_ah_attr *attr)7070+{7171+ memcpy(&attr->grh, &av->grh, sizeof(av->grh));7272+ attr->port_num = av->port_num;7373+ return 0;7474+}7575+7676+int rxe_av_fill_ip_info(struct rxe_dev *rxe,7777+ struct rxe_av *av,7878+ struct ib_ah_attr *attr,7979+ struct ib_gid_attr *sgid_attr,8080+ union ib_gid *sgid)8181+{8282+ rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);8383+ rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);8484+ av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);8585+8686+ return 0;8787+}8888+8989+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)9090+{9191+ if (!pkt || !pkt->qp)9292+ return NULL;9393+9494+ if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC)9595+ return &pkt->qp->pri_av;9696+9797+ return (pkt->wqe) ? &pkt->wqe->av : NULL;9898+}
+734
drivers/infiniband/sw/rxe/rxe_comp.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+3636+#include "rxe.h"3737+#include "rxe_loc.h"3838+#include "rxe_queue.h"3939+#include "rxe_task.h"4040+4141+enum comp_state {4242+ COMPST_GET_ACK,4343+ COMPST_GET_WQE,4444+ COMPST_COMP_WQE,4545+ COMPST_COMP_ACK,4646+ COMPST_CHECK_PSN,4747+ COMPST_CHECK_ACK,4848+ COMPST_READ,4949+ COMPST_ATOMIC,5050+ COMPST_WRITE_SEND,5151+ COMPST_UPDATE_COMP,5252+ COMPST_ERROR_RETRY,5353+ COMPST_RNR_RETRY,5454+ COMPST_ERROR,5555+ COMPST_EXIT, /* We have an issue, and we want to rerun the completer */5656+ COMPST_DONE, /* The completer finished successflly */5757+};5858+5959+static char *comp_state_name[] = {6060+ [COMPST_GET_ACK] = "GET ACK",6161+ [COMPST_GET_WQE] = "GET WQE",6262+ [COMPST_COMP_WQE] = "COMP WQE",6363+ [COMPST_COMP_ACK] = "COMP ACK",6464+ [COMPST_CHECK_PSN] = "CHECK PSN",6565+ [COMPST_CHECK_ACK] = "CHECK ACK",6666+ [COMPST_READ] = "READ",6767+ [COMPST_ATOMIC] = "ATOMIC",6868+ [COMPST_WRITE_SEND] = "WRITE/SEND",6969+ [COMPST_UPDATE_COMP] = "UPDATE COMP",7070+ [COMPST_ERROR_RETRY] = "ERROR RETRY",7171+ [COMPST_RNR_RETRY] = "RNR RETRY",7272+ [COMPST_ERROR] = "ERROR",7373+ [COMPST_EXIT] = "EXIT",7474+ [COMPST_DONE] = "DONE",7575+};7676+7777+static unsigned long rnrnak_usec[32] = {7878+ [IB_RNR_TIMER_655_36] = 655360,7979+ [IB_RNR_TIMER_000_01] = 10,8080+ [IB_RNR_TIMER_000_02] = 20,8181+ [IB_RNR_TIMER_000_03] = 30,8282+ [IB_RNR_TIMER_000_04] = 40,8383+ [IB_RNR_TIMER_000_06] = 60,8484+ [IB_RNR_TIMER_000_08] = 80,8585+ [IB_RNR_TIMER_000_12] = 120,8686+ [IB_RNR_TIMER_000_16] = 160,8787+ [IB_RNR_TIMER_000_24] = 240,8888+ [IB_RNR_TIMER_000_32] = 320,8989+ [IB_RNR_TIMER_000_48] = 480,9090+ [IB_RNR_TIMER_000_64] = 640,9191+ [IB_RNR_TIMER_000_96] = 960,9292+ [IB_RNR_TIMER_001_28] = 1280,9393+ [IB_RNR_TIMER_001_92] = 1920,9494+ [IB_RNR_TIMER_002_56] = 2560,9595+ [IB_RNR_TIMER_003_84] = 3840,9696+ [IB_RNR_TIMER_005_12] = 5120,9797+ [IB_RNR_TIMER_007_68] = 7680,9898+ [IB_RNR_TIMER_010_24] = 10240,9999+ [IB_RNR_TIMER_015_36] = 15360,100100+ [IB_RNR_TIMER_020_48] = 20480,101101+ [IB_RNR_TIMER_030_72] = 30720,102102+ [IB_RNR_TIMER_040_96] = 40960,103103+ [IB_RNR_TIMER_061_44] = 61410,104104+ [IB_RNR_TIMER_081_92] = 81920,105105+ [IB_RNR_TIMER_122_88] = 122880,106106+ [IB_RNR_TIMER_163_84] = 163840,107107+ [IB_RNR_TIMER_245_76] = 245760,108108+ [IB_RNR_TIMER_327_68] = 327680,109109+ [IB_RNR_TIMER_491_52] = 491520,110110+};111111+112112+static inline unsigned long rnrnak_jiffies(u8 timeout)113113+{114114+ return max_t(unsigned long,115115+ usecs_to_jiffies(rnrnak_usec[timeout]), 1);116116+}117117+118118+static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)119119+{120120+ switch (opcode) {121121+ case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE;122122+ case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE;123123+ case IB_WR_SEND: return IB_WC_SEND;124124+ case IB_WR_SEND_WITH_IMM: return IB_WC_SEND;125125+ case IB_WR_RDMA_READ: return IB_WC_RDMA_READ;126126+ case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP;127127+ case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;128128+ case IB_WR_LSO: return IB_WC_LSO;129129+ case IB_WR_SEND_WITH_INV: return IB_WC_SEND;130130+ case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;131131+ case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;132132+ case IB_WR_REG_MR: return IB_WC_REG_MR;133133+134134+ default:135135+ return 0xff;136136+ }137137+}138138+139139+void retransmit_timer(unsigned long data)140140+{141141+ struct rxe_qp *qp = (struct rxe_qp *)data;142142+143143+ if (qp->valid) {144144+ qp->comp.timeout = 1;145145+ rxe_run_task(&qp->comp.task, 1);146146+ }147147+}148148+149149+void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,150150+ struct sk_buff *skb)151151+{152152+ int must_sched;153153+154154+ skb_queue_tail(&qp->resp_pkts, skb);155155+156156+ must_sched = skb_queue_len(&qp->resp_pkts) > 1;157157+ rxe_run_task(&qp->comp.task, must_sched);158158+}159159+160160+static inline enum comp_state get_wqe(struct rxe_qp *qp,161161+ struct rxe_pkt_info *pkt,162162+ struct rxe_send_wqe **wqe_p)163163+{164164+ struct rxe_send_wqe *wqe;165165+166166+ /* we come here whether or not we found a response packet to see if167167+ * there are any posted WQEs168168+ */169169+ wqe = queue_head(qp->sq.queue);170170+ *wqe_p = wqe;171171+172172+ /* no WQE or requester has not started it yet */173173+ if (!wqe || wqe->state == wqe_state_posted)174174+ return pkt ? COMPST_DONE : COMPST_EXIT;175175+176176+ /* WQE does not require an ack */177177+ if (wqe->state == wqe_state_done)178178+ return COMPST_COMP_WQE;179179+180180+ /* WQE caused an error */181181+ if (wqe->state == wqe_state_error)182182+ return COMPST_ERROR;183183+184184+ /* we have a WQE, if we also have an ack check its PSN */185185+ return pkt ? COMPST_CHECK_PSN : COMPST_EXIT;186186+}187187+188188+static inline void reset_retry_counters(struct rxe_qp *qp)189189+{190190+ qp->comp.retry_cnt = qp->attr.retry_cnt;191191+ qp->comp.rnr_retry = qp->attr.rnr_retry;192192+}193193+194194+static inline enum comp_state check_psn(struct rxe_qp *qp,195195+ struct rxe_pkt_info *pkt,196196+ struct rxe_send_wqe *wqe)197197+{198198+ s32 diff;199199+200200+ /* check to see if response is past the oldest WQE. if it is, complete201201+ * send/write or error read/atomic202202+ */203203+ diff = psn_compare(pkt->psn, wqe->last_psn);204204+ if (diff > 0) {205205+ if (wqe->state == wqe_state_pending) {206206+ if (wqe->mask & WR_ATOMIC_OR_READ_MASK)207207+ return COMPST_ERROR_RETRY;208208+209209+ reset_retry_counters(qp);210210+ return COMPST_COMP_WQE;211211+ } else {212212+ return COMPST_DONE;213213+ }214214+ }215215+216216+ /* compare response packet to expected response */217217+ diff = psn_compare(pkt->psn, qp->comp.psn);218218+ if (diff < 0) {219219+ /* response is most likely a retried packet if it matches an220220+ * uncompleted WQE go complete it else ignore it221221+ */222222+ if (pkt->psn == wqe->last_psn)223223+ return COMPST_COMP_ACK;224224+ else225225+ return COMPST_DONE;226226+ } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {227227+ return COMPST_ERROR_RETRY;228228+ } else {229229+ return COMPST_CHECK_ACK;230230+ }231231+}232232+233233+static inline enum comp_state check_ack(struct rxe_qp *qp,234234+ struct rxe_pkt_info *pkt,235235+ struct rxe_send_wqe *wqe)236236+{237237+ unsigned int mask = pkt->mask;238238+ u8 syn;239239+240240+ /* Check the sequence only */241241+ switch (qp->comp.opcode) {242242+ case -1:243243+ /* Will catch all *_ONLY cases. */244244+ if (!(mask & RXE_START_MASK))245245+ return COMPST_ERROR;246246+247247+ break;248248+249249+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:250250+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:251251+ if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&252252+ pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {253253+ return COMPST_ERROR;254254+ }255255+ break;256256+ default:257257+ WARN_ON(1);258258+ }259259+260260+ /* Check operation validity. */261261+ switch (pkt->opcode) {262262+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:263263+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:264264+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:265265+ syn = aeth_syn(pkt);266266+267267+ if ((syn & AETH_TYPE_MASK) != AETH_ACK)268268+ return COMPST_ERROR;269269+270270+ /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE271271+ * doesn't have an AETH)272272+ */273273+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:274274+ if (wqe->wr.opcode != IB_WR_RDMA_READ &&275275+ wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {276276+ return COMPST_ERROR;277277+ }278278+ reset_retry_counters(qp);279279+ return COMPST_READ;280280+281281+ case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:282282+ syn = aeth_syn(pkt);283283+284284+ if ((syn & AETH_TYPE_MASK) != AETH_ACK)285285+ return COMPST_ERROR;286286+287287+ if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP &&288288+ wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD)289289+ return COMPST_ERROR;290290+ reset_retry_counters(qp);291291+ return COMPST_ATOMIC;292292+293293+ case IB_OPCODE_RC_ACKNOWLEDGE:294294+ syn = aeth_syn(pkt);295295+ switch (syn & AETH_TYPE_MASK) {296296+ case AETH_ACK:297297+ reset_retry_counters(qp);298298+ return COMPST_WRITE_SEND;299299+300300+ case AETH_RNR_NAK:301301+ return COMPST_RNR_RETRY;302302+303303+ case AETH_NAK:304304+ switch (syn) {305305+ case AETH_NAK_PSN_SEQ_ERROR:306306+ /* a nak implicitly acks all packets with psns307307+ * before308308+ */309309+ if (psn_compare(pkt->psn, qp->comp.psn) > 0) {310310+ qp->comp.psn = pkt->psn;311311+ if (qp->req.wait_psn) {312312+ qp->req.wait_psn = 0;313313+ rxe_run_task(&qp->req.task, 1);314314+ }315315+ }316316+ return COMPST_ERROR_RETRY;317317+318318+ case AETH_NAK_INVALID_REQ:319319+ wqe->status = IB_WC_REM_INV_REQ_ERR;320320+ return COMPST_ERROR;321321+322322+ case AETH_NAK_REM_ACC_ERR:323323+ wqe->status = IB_WC_REM_ACCESS_ERR;324324+ return COMPST_ERROR;325325+326326+ case AETH_NAK_REM_OP_ERR:327327+ wqe->status = IB_WC_REM_OP_ERR;328328+ return COMPST_ERROR;329329+330330+ default:331331+ pr_warn("unexpected nak %x\n", syn);332332+ wqe->status = IB_WC_REM_OP_ERR;333333+ return COMPST_ERROR;334334+ }335335+336336+ default:337337+ return COMPST_ERROR;338338+ }339339+ break;340340+341341+ default:342342+ pr_warn("unexpected opcode\n");343343+ }344344+345345+ return COMPST_ERROR;346346+}347347+348348+static inline enum comp_state do_read(struct rxe_qp *qp,349349+ struct rxe_pkt_info *pkt,350350+ struct rxe_send_wqe *wqe)351351+{352352+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);353353+ int ret;354354+355355+ ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,356356+ &wqe->dma, payload_addr(pkt),357357+ payload_size(pkt), to_mem_obj, NULL);358358+ if (ret)359359+ return COMPST_ERROR;360360+361361+ if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))362362+ return COMPST_COMP_ACK;363363+ else364364+ return COMPST_UPDATE_COMP;365365+}366366+367367+static inline enum comp_state do_atomic(struct rxe_qp *qp,368368+ struct rxe_pkt_info *pkt,369369+ struct rxe_send_wqe *wqe)370370+{371371+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);372372+ int ret;373373+374374+ u64 atomic_orig = atmack_orig(pkt);375375+376376+ ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,377377+ &wqe->dma, &atomic_orig,378378+ sizeof(u64), to_mem_obj, NULL);379379+ if (ret)380380+ return COMPST_ERROR;381381+ else382382+ return COMPST_COMP_ACK;383383+}384384+385385+static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,386386+ struct rxe_cqe *cqe)387387+{388388+ memset(cqe, 0, sizeof(*cqe));389389+390390+ if (!qp->is_user) {391391+ struct ib_wc *wc = &cqe->ibwc;392392+393393+ wc->wr_id = wqe->wr.wr_id;394394+ wc->status = wqe->status;395395+ wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);396396+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||397397+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)398398+ wc->wc_flags = IB_WC_WITH_IMM;399399+ wc->byte_len = wqe->dma.length;400400+ wc->qp = &qp->ibqp;401401+ } else {402402+ struct ib_uverbs_wc *uwc = &cqe->uibwc;403403+404404+ uwc->wr_id = wqe->wr.wr_id;405405+ uwc->status = wqe->status;406406+ uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);407407+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||408408+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)409409+ uwc->wc_flags = IB_WC_WITH_IMM;410410+ uwc->byte_len = wqe->dma.length;411411+ uwc->qp_num = qp->ibqp.qp_num;412412+ }413413+}414414+415415+static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)416416+{417417+ struct rxe_cqe cqe;418418+419419+ if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||420420+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||421421+ (qp->req.state == QP_STATE_ERROR)) {422422+ make_send_cqe(qp, wqe, &cqe);423423+ rxe_cq_post(qp->scq, &cqe, 0);424424+ }425425+426426+ advance_consumer(qp->sq.queue);427427+428428+ /*429429+ * we completed something so let req run again430430+ * if it is trying to fence431431+ */432432+ if (qp->req.wait_fence) {433433+ qp->req.wait_fence = 0;434434+ rxe_run_task(&qp->req.task, 1);435435+ }436436+}437437+438438+static inline enum comp_state complete_ack(struct rxe_qp *qp,439439+ struct rxe_pkt_info *pkt,440440+ struct rxe_send_wqe *wqe)441441+{442442+ unsigned long flags;443443+444444+ if (wqe->has_rd_atomic) {445445+ wqe->has_rd_atomic = 0;446446+ atomic_inc(&qp->req.rd_atomic);447447+ if (qp->req.need_rd_atomic) {448448+ qp->comp.timeout_retry = 0;449449+ qp->req.need_rd_atomic = 0;450450+ rxe_run_task(&qp->req.task, 1);451451+ }452452+ }453453+454454+ if (unlikely(qp->req.state == QP_STATE_DRAIN)) {455455+ /* state_lock used by requester & completer */456456+ spin_lock_irqsave(&qp->state_lock, flags);457457+ if ((qp->req.state == QP_STATE_DRAIN) &&458458+ (qp->comp.psn == qp->req.psn)) {459459+ qp->req.state = QP_STATE_DRAINED;460460+ spin_unlock_irqrestore(&qp->state_lock, flags);461461+462462+ if (qp->ibqp.event_handler) {463463+ struct ib_event ev;464464+465465+ ev.device = qp->ibqp.device;466466+ ev.element.qp = &qp->ibqp;467467+ ev.event = IB_EVENT_SQ_DRAINED;468468+ qp->ibqp.event_handler(&ev,469469+ qp->ibqp.qp_context);470470+ }471471+ } else {472472+ spin_unlock_irqrestore(&qp->state_lock, flags);473473+ }474474+ }475475+476476+ do_complete(qp, wqe);477477+478478+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)479479+ return COMPST_UPDATE_COMP;480480+ else481481+ return COMPST_DONE;482482+}483483+484484+static inline enum comp_state complete_wqe(struct rxe_qp *qp,485485+ struct rxe_pkt_info *pkt,486486+ struct rxe_send_wqe *wqe)487487+{488488+ qp->comp.opcode = -1;489489+490490+ if (pkt) {491491+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)492492+ qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;493493+494494+ if (qp->req.wait_psn) {495495+ qp->req.wait_psn = 0;496496+ rxe_run_task(&qp->req.task, 1);497497+ }498498+ }499499+500500+ do_complete(qp, wqe);501501+502502+ return COMPST_GET_WQE;503503+}504504+505505+int rxe_completer(void *arg)506506+{507507+ struct rxe_qp *qp = (struct rxe_qp *)arg;508508+ struct rxe_send_wqe *wqe = wqe;509509+ struct sk_buff *skb = NULL;510510+ struct rxe_pkt_info *pkt = NULL;511511+ enum comp_state state;512512+513513+ if (!qp->valid) {514514+ while ((skb = skb_dequeue(&qp->resp_pkts))) {515515+ rxe_drop_ref(qp);516516+ kfree_skb(skb);517517+ }518518+ skb = NULL;519519+ pkt = NULL;520520+521521+ while (queue_head(qp->sq.queue))522522+ advance_consumer(qp->sq.queue);523523+524524+ goto exit;525525+ }526526+527527+ if (qp->req.state == QP_STATE_ERROR) {528528+ while ((skb = skb_dequeue(&qp->resp_pkts))) {529529+ rxe_drop_ref(qp);530530+ kfree_skb(skb);531531+ }532532+ skb = NULL;533533+ pkt = NULL;534534+535535+ while ((wqe = queue_head(qp->sq.queue))) {536536+ wqe->status = IB_WC_WR_FLUSH_ERR;537537+ do_complete(qp, wqe);538538+ }539539+540540+ goto exit;541541+ }542542+543543+ if (qp->req.state == QP_STATE_RESET) {544544+ while ((skb = skb_dequeue(&qp->resp_pkts))) {545545+ rxe_drop_ref(qp);546546+ kfree_skb(skb);547547+ }548548+ skb = NULL;549549+ pkt = NULL;550550+551551+ while (queue_head(qp->sq.queue))552552+ advance_consumer(qp->sq.queue);553553+554554+ goto exit;555555+ }556556+557557+ if (qp->comp.timeout) {558558+ qp->comp.timeout_retry = 1;559559+ qp->comp.timeout = 0;560560+ } else {561561+ qp->comp.timeout_retry = 0;562562+ }563563+564564+ if (qp->req.need_retry)565565+ goto exit;566566+567567+ state = COMPST_GET_ACK;568568+569569+ while (1) {570570+ pr_debug("state = %s\n", comp_state_name[state]);571571+ switch (state) {572572+ case COMPST_GET_ACK:573573+ skb = skb_dequeue(&qp->resp_pkts);574574+ if (skb) {575575+ pkt = SKB_TO_PKT(skb);576576+ qp->comp.timeout_retry = 0;577577+ }578578+ state = COMPST_GET_WQE;579579+ break;580580+581581+ case COMPST_GET_WQE:582582+ state = get_wqe(qp, pkt, &wqe);583583+ break;584584+585585+ case COMPST_CHECK_PSN:586586+ state = check_psn(qp, pkt, wqe);587587+ break;588588+589589+ case COMPST_CHECK_ACK:590590+ state = check_ack(qp, pkt, wqe);591591+ break;592592+593593+ case COMPST_READ:594594+ state = do_read(qp, pkt, wqe);595595+ break;596596+597597+ case COMPST_ATOMIC:598598+ state = do_atomic(qp, pkt, wqe);599599+ break;600600+601601+ case COMPST_WRITE_SEND:602602+ if (wqe->state == wqe_state_pending &&603603+ wqe->last_psn == pkt->psn)604604+ state = COMPST_COMP_ACK;605605+ else606606+ state = COMPST_UPDATE_COMP;607607+ break;608608+609609+ case COMPST_COMP_ACK:610610+ state = complete_ack(qp, pkt, wqe);611611+ break;612612+613613+ case COMPST_COMP_WQE:614614+ state = complete_wqe(qp, pkt, wqe);615615+ break;616616+617617+ case COMPST_UPDATE_COMP:618618+ if (pkt->mask & RXE_END_MASK)619619+ qp->comp.opcode = -1;620620+ else621621+ qp->comp.opcode = pkt->opcode;622622+623623+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)624624+ qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;625625+626626+ if (qp->req.wait_psn) {627627+ qp->req.wait_psn = 0;628628+ rxe_run_task(&qp->req.task, 1);629629+ }630630+631631+ state = COMPST_DONE;632632+ break;633633+634634+ case COMPST_DONE:635635+ if (pkt) {636636+ rxe_drop_ref(pkt->qp);637637+ kfree_skb(skb);638638+ }639639+ goto done;640640+641641+ case COMPST_EXIT:642642+ if (qp->comp.timeout_retry && wqe) {643643+ state = COMPST_ERROR_RETRY;644644+ break;645645+ }646646+647647+ /* re reset the timeout counter if648648+ * (1) QP is type RC649649+ * (2) the QP is alive650650+ * (3) there is a packet sent by the requester that651651+ * might be acked (we still might get spurious652652+ * timeouts but try to keep them as few as possible)653653+ * (4) the timeout parameter is set654654+ */655655+ if ((qp_type(qp) == IB_QPT_RC) &&656656+ (qp->req.state == QP_STATE_READY) &&657657+ (psn_compare(qp->req.psn, qp->comp.psn) > 0) &&658658+ qp->qp_timeout_jiffies)659659+ mod_timer(&qp->retrans_timer,660660+ jiffies + qp->qp_timeout_jiffies);661661+ goto exit;662662+663663+ case COMPST_ERROR_RETRY:664664+ /* we come here if the retry timer fired and we did665665+ * not receive a response packet. try to retry the send666666+ * queue if that makes sense and the limits have not667667+ * been exceeded. remember that some timeouts are668668+ * spurious since we do not reset the timer but kick669669+ * it down the road or let it expire670670+ */671671+672672+ /* there is nothing to retry in this case */673673+ if (!wqe || (wqe->state == wqe_state_posted))674674+ goto exit;675675+676676+ if (qp->comp.retry_cnt > 0) {677677+ if (qp->comp.retry_cnt != 7)678678+ qp->comp.retry_cnt--;679679+680680+ /* no point in retrying if we have already681681+ * seen the last ack that the requester could682682+ * have caused683683+ */684684+ if (psn_compare(qp->req.psn,685685+ qp->comp.psn) > 0) {686686+ /* tell the requester to retry the687687+ * send send queue next time around688688+ */689689+ qp->req.need_retry = 1;690690+ rxe_run_task(&qp->req.task, 1);691691+ }692692+ goto exit;693693+ } else {694694+ wqe->status = IB_WC_RETRY_EXC_ERR;695695+ state = COMPST_ERROR;696696+ }697697+ break;698698+699699+ case COMPST_RNR_RETRY:700700+ if (qp->comp.rnr_retry > 0) {701701+ if (qp->comp.rnr_retry != 7)702702+ qp->comp.rnr_retry--;703703+704704+ qp->req.need_retry = 1;705705+ pr_debug("set rnr nak timer\n");706706+ mod_timer(&qp->rnr_nak_timer,707707+ jiffies + rnrnak_jiffies(aeth_syn(pkt)708708+ & ~AETH_TYPE_MASK));709709+ goto exit;710710+ } else {711711+ wqe->status = IB_WC_RNR_RETRY_EXC_ERR;712712+ state = COMPST_ERROR;713713+ }714714+ break;715715+716716+ case COMPST_ERROR:717717+ do_complete(qp, wqe);718718+ rxe_qp_error(qp);719719+ goto exit;720720+ }721721+ }722722+723723+exit:724724+ /* we come here if we are done with processing and want the task to725725+ * exit from the loop calling us726726+ */727727+ return -EAGAIN;728728+729729+done:730730+ /* we come here if we have processed a packet we want the task to call731731+ * us again to see if there is anything else to do732732+ */733733+ return 0;734734+}
+165
drivers/infiniband/sw/rxe/rxe_cq.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+#include "rxe_queue.h"3737+3838+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,3939+ int cqe, int comp_vector, struct ib_udata *udata)4040+{4141+ int count;4242+4343+ if (cqe <= 0) {4444+ pr_warn("cqe(%d) <= 0\n", cqe);4545+ goto err1;4646+ }4747+4848+ if (cqe > rxe->attr.max_cqe) {4949+ pr_warn("cqe(%d) > max_cqe(%d)\n",5050+ cqe, rxe->attr.max_cqe);5151+ goto err1;5252+ }5353+5454+ if (cq) {5555+ count = queue_count(cq->queue);5656+ if (cqe < count) {5757+ pr_warn("cqe(%d) < current # elements in queue (%d)",5858+ cqe, count);5959+ goto err1;6060+ }6161+ }6262+6363+ return 0;6464+6565+err1:6666+ return -EINVAL;6767+}6868+6969+static void rxe_send_complete(unsigned long data)7070+{7171+ struct rxe_cq *cq = (struct rxe_cq *)data;7272+7373+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);7474+}7575+7676+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,7777+ int comp_vector, struct ib_ucontext *context,7878+ struct ib_udata *udata)7979+{8080+ int err;8181+8282+ cq->queue = rxe_queue_init(rxe, &cqe,8383+ sizeof(struct rxe_cqe));8484+ if (!cq->queue) {8585+ pr_warn("unable to create cq\n");8686+ return -ENOMEM;8787+ }8888+8989+ err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,9090+ cq->queue->buf_size, &cq->queue->ip);9191+ if (err) {9292+ kvfree(cq->queue->buf);9393+ kfree(cq->queue);9494+ return err;9595+ }9696+9797+ if (udata)9898+ cq->is_user = 1;9999+100100+ tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);101101+102102+ spin_lock_init(&cq->cq_lock);103103+ cq->ibcq.cqe = cqe;104104+ return 0;105105+}106106+107107+int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)108108+{109109+ int err;110110+111111+ err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,112112+ sizeof(struct rxe_cqe),113113+ cq->queue->ip ? cq->queue->ip->context : NULL,114114+ udata, NULL, &cq->cq_lock);115115+ if (!err)116116+ cq->ibcq.cqe = cqe;117117+118118+ return err;119119+}120120+121121+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)122122+{123123+ struct ib_event ev;124124+ unsigned long flags;125125+126126+ spin_lock_irqsave(&cq->cq_lock, flags);127127+128128+ if (unlikely(queue_full(cq->queue))) {129129+ spin_unlock_irqrestore(&cq->cq_lock, flags);130130+ if (cq->ibcq.event_handler) {131131+ ev.device = cq->ibcq.device;132132+ ev.element.cq = &cq->ibcq;133133+ ev.event = IB_EVENT_CQ_ERR;134134+ cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);135135+ }136136+137137+ return -EBUSY;138138+ }139139+140140+ memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));141141+142142+ /* make sure all changes to the CQ are written before we update the143143+ * producer pointer144144+ */145145+ smp_wmb();146146+147147+ advance_producer(cq->queue);148148+ spin_unlock_irqrestore(&cq->cq_lock, flags);149149+150150+ if ((cq->notify == IB_CQ_NEXT_COMP) ||151151+ (cq->notify == IB_CQ_SOLICITED && solicited)) {152152+ cq->notify = 0;153153+ tasklet_schedule(&cq->comp_task);154154+ }155155+156156+ return 0;157157+}158158+159159+void rxe_cq_cleanup(void *arg)160160+{161161+ struct rxe_cq *cq = arg;162162+163163+ if (cq->queue)164164+ rxe_queue_cleanup(cq->queue);165165+}
+166
drivers/infiniband/sw/rxe/rxe_dma.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+#define DMA_BAD_ADDER ((u64)0)3838+3939+static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)4040+{4141+ return dma_addr == DMA_BAD_ADDER;4242+}4343+4444+static u64 rxe_dma_map_single(struct ib_device *dev,4545+ void *cpu_addr, size_t size,4646+ enum dma_data_direction direction)4747+{4848+ WARN_ON(!valid_dma_direction(direction));4949+ return (uintptr_t)cpu_addr;5050+}5151+5252+static void rxe_dma_unmap_single(struct ib_device *dev,5353+ u64 addr, size_t size,5454+ enum dma_data_direction direction)5555+{5656+ WARN_ON(!valid_dma_direction(direction));5757+}5858+5959+static u64 rxe_dma_map_page(struct ib_device *dev,6060+ struct page *page,6161+ unsigned long offset,6262+ size_t size, enum dma_data_direction direction)6363+{6464+ u64 addr;6565+6666+ WARN_ON(!valid_dma_direction(direction));6767+6868+ if (offset + size > PAGE_SIZE) {6969+ addr = DMA_BAD_ADDER;7070+ goto done;7171+ }7272+7373+ addr = (uintptr_t)page_address(page);7474+ if (addr)7575+ addr += offset;7676+7777+done:7878+ return addr;7979+}8080+8181+static void rxe_dma_unmap_page(struct ib_device *dev,8282+ u64 addr, size_t size,8383+ enum dma_data_direction direction)8484+{8585+ WARN_ON(!valid_dma_direction(direction));8686+}8787+8888+static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,8989+ int nents, enum dma_data_direction direction)9090+{9191+ struct scatterlist *sg;9292+ u64 addr;9393+ int i;9494+ int ret = nents;9595+9696+ WARN_ON(!valid_dma_direction(direction));9797+9898+ for_each_sg(sgl, sg, nents, i) {9999+ addr = (uintptr_t)page_address(sg_page(sg));100100+ if (!addr) {101101+ ret = 0;102102+ break;103103+ }104104+ sg->dma_address = addr + sg->offset;105105+#ifdef CONFIG_NEED_SG_DMA_LENGTH106106+ sg->dma_length = sg->length;107107+#endif108108+ }109109+110110+ return ret;111111+}112112+113113+static void rxe_unmap_sg(struct ib_device *dev,114114+ struct scatterlist *sg, int nents,115115+ enum dma_data_direction direction)116116+{117117+ WARN_ON(!valid_dma_direction(direction));118118+}119119+120120+static void rxe_sync_single_for_cpu(struct ib_device *dev,121121+ u64 addr,122122+ size_t size, enum dma_data_direction dir)123123+{124124+}125125+126126+static void rxe_sync_single_for_device(struct ib_device *dev,127127+ u64 addr,128128+ size_t size, enum dma_data_direction dir)129129+{130130+}131131+132132+static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,133133+ u64 *dma_handle, gfp_t flag)134134+{135135+ struct page *p;136136+ void *addr = NULL;137137+138138+ p = alloc_pages(flag, get_order(size));139139+ if (p)140140+ addr = page_address(p);141141+142142+ if (dma_handle)143143+ *dma_handle = (uintptr_t)addr;144144+145145+ return addr;146146+}147147+148148+static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,149149+ void *cpu_addr, u64 dma_handle)150150+{151151+ free_pages((unsigned long)cpu_addr, get_order(size));152152+}153153+154154+struct ib_dma_mapping_ops rxe_dma_mapping_ops = {155155+ .mapping_error = rxe_mapping_error,156156+ .map_single = rxe_dma_map_single,157157+ .unmap_single = rxe_dma_unmap_single,158158+ .map_page = rxe_dma_map_page,159159+ .unmap_page = rxe_dma_unmap_page,160160+ .map_sg = rxe_map_sg,161161+ .unmap_sg = rxe_unmap_sg,162162+ .sync_single_for_cpu = rxe_sync_single_for_cpu,163163+ .sync_single_for_device = rxe_sync_single_for_device,164164+ .alloc_coherent = rxe_dma_alloc_coherent,165165+ .free_coherent = rxe_dma_free_coherent166166+};
+952
drivers/infiniband/sw/rxe/rxe_hdr.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_HDR_H3535+#define RXE_HDR_H3636+3737+/* extracted information about a packet carried in an sk_buff struct fits in3838+ * the skbuff cb array. Must be at most 48 bytes. stored in control block of3939+ * sk_buff for received packets.4040+ */4141+struct rxe_pkt_info {4242+ struct rxe_dev *rxe; /* device that owns packet */4343+ struct rxe_qp *qp; /* qp that owns packet */4444+ struct rxe_send_wqe *wqe; /* send wqe */4545+ u8 *hdr; /* points to bth */4646+ u32 mask; /* useful info about pkt */4747+ u32 psn; /* bth psn of packet */4848+ u16 pkey_index; /* partition of pkt */4949+ u16 paylen; /* length of bth - icrc */5050+ u8 port_num; /* port pkt received on */5151+ u8 opcode; /* bth opcode of packet */5252+ u8 offset; /* bth offset from pkt->hdr */5353+};5454+5555+/* Macros should be used only for received skb */5656+#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)5757+#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)5858+5959+/*6060+ * IBA header types and methods6161+ *6262+ * Some of these are for reference and completeness only since6363+ * rxe does not currently support RD transport6464+ * most of this could be moved into IB core. ib_pack.h has6565+ * part of this but is incomplete6666+ *6767+ * Header specific routines to insert/extract values to/from headers6868+ * the routines that are named __hhh_(set_)fff() take a pointer to a6969+ * hhh header and get(set) the fff field. The routines named7070+ * hhh_(set_)fff take a packet info struct and find the7171+ * header and field based on the opcode in the packet.7272+ * Conversion to/from network byte order from cpu order is also done.7373+ */7474+7575+#define RXE_ICRC_SIZE (4)7676+#define RXE_MAX_HDR_LENGTH (80)7777+7878+/******************************************************************************7979+ * Base Transport Header8080+ ******************************************************************************/8181+struct rxe_bth {8282+ u8 opcode;8383+ u8 flags;8484+ __be16 pkey;8585+ __be32 qpn;8686+ __be32 apsn;8787+};8888+8989+#define BTH_TVER (0)9090+#define BTH_DEF_PKEY (0xffff)9191+9292+#define BTH_SE_MASK (0x80)9393+#define BTH_MIG_MASK (0x40)9494+#define BTH_PAD_MASK (0x30)9595+#define BTH_TVER_MASK (0x0f)9696+#define BTH_FECN_MASK (0x80000000)9797+#define BTH_BECN_MASK (0x40000000)9898+#define BTH_RESV6A_MASK (0x3f000000)9999+#define BTH_QPN_MASK (0x00ffffff)100100+#define BTH_ACK_MASK (0x80000000)101101+#define BTH_RESV7_MASK (0x7f000000)102102+#define BTH_PSN_MASK (0x00ffffff)103103+104104+static inline u8 __bth_opcode(void *arg)105105+{106106+ struct rxe_bth *bth = arg;107107+108108+ return bth->opcode;109109+}110110+111111+static inline void __bth_set_opcode(void *arg, u8 opcode)112112+{113113+ struct rxe_bth *bth = arg;114114+115115+ bth->opcode = opcode;116116+}117117+118118+static inline u8 __bth_se(void *arg)119119+{120120+ struct rxe_bth *bth = arg;121121+122122+ return 0 != (BTH_SE_MASK & bth->flags);123123+}124124+125125+static inline void __bth_set_se(void *arg, int se)126126+{127127+ struct rxe_bth *bth = arg;128128+129129+ if (se)130130+ bth->flags |= BTH_SE_MASK;131131+ else132132+ bth->flags &= ~BTH_SE_MASK;133133+}134134+135135+static inline u8 __bth_mig(void *arg)136136+{137137+ struct rxe_bth *bth = arg;138138+139139+ return 0 != (BTH_MIG_MASK & bth->flags);140140+}141141+142142+static inline void __bth_set_mig(void *arg, u8 mig)143143+{144144+ struct rxe_bth *bth = arg;145145+146146+ if (mig)147147+ bth->flags |= BTH_MIG_MASK;148148+ else149149+ bth->flags &= ~BTH_MIG_MASK;150150+}151151+152152+static inline u8 __bth_pad(void *arg)153153+{154154+ struct rxe_bth *bth = arg;155155+156156+ return (BTH_PAD_MASK & bth->flags) >> 4;157157+}158158+159159+static inline void __bth_set_pad(void *arg, u8 pad)160160+{161161+ struct rxe_bth *bth = arg;162162+163163+ bth->flags = (BTH_PAD_MASK & (pad << 4)) |164164+ (~BTH_PAD_MASK & bth->flags);165165+}166166+167167+static inline u8 __bth_tver(void *arg)168168+{169169+ struct rxe_bth *bth = arg;170170+171171+ return BTH_TVER_MASK & bth->flags;172172+}173173+174174+static inline void __bth_set_tver(void *arg, u8 tver)175175+{176176+ struct rxe_bth *bth = arg;177177+178178+ bth->flags = (BTH_TVER_MASK & tver) |179179+ (~BTH_TVER_MASK & bth->flags);180180+}181181+182182+static inline u16 __bth_pkey(void *arg)183183+{184184+ struct rxe_bth *bth = arg;185185+186186+ return be16_to_cpu(bth->pkey);187187+}188188+189189+static inline void __bth_set_pkey(void *arg, u16 pkey)190190+{191191+ struct rxe_bth *bth = arg;192192+193193+ bth->pkey = cpu_to_be16(pkey);194194+}195195+196196+static inline u32 __bth_qpn(void *arg)197197+{198198+ struct rxe_bth *bth = arg;199199+200200+ return BTH_QPN_MASK & be32_to_cpu(bth->qpn);201201+}202202+203203+static inline void __bth_set_qpn(void *arg, u32 qpn)204204+{205205+ struct rxe_bth *bth = arg;206206+ u32 resvqpn = be32_to_cpu(bth->qpn);207207+208208+ bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) |209209+ (~BTH_QPN_MASK & resvqpn));210210+}211211+212212+static inline int __bth_fecn(void *arg)213213+{214214+ struct rxe_bth *bth = arg;215215+216216+ return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn);217217+}218218+219219+static inline void __bth_set_fecn(void *arg, int fecn)220220+{221221+ struct rxe_bth *bth = arg;222222+223223+ if (fecn)224224+ bth->qpn |= cpu_to_be32(BTH_FECN_MASK);225225+ else226226+ bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK);227227+}228228+229229+static inline int __bth_becn(void *arg)230230+{231231+ struct rxe_bth *bth = arg;232232+233233+ return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn);234234+}235235+236236+static inline void __bth_set_becn(void *arg, int becn)237237+{238238+ struct rxe_bth *bth = arg;239239+240240+ if (becn)241241+ bth->qpn |= cpu_to_be32(BTH_BECN_MASK);242242+ else243243+ bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK);244244+}245245+246246+static inline u8 __bth_resv6a(void *arg)247247+{248248+ struct rxe_bth *bth = arg;249249+250250+ return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24;251251+}252252+253253+static inline void __bth_set_resv6a(void *arg)254254+{255255+ struct rxe_bth *bth = arg;256256+257257+ bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK);258258+}259259+260260+static inline int __bth_ack(void *arg)261261+{262262+ struct rxe_bth *bth = arg;263263+264264+ return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn);265265+}266266+267267+static inline void __bth_set_ack(void *arg, int ack)268268+{269269+ struct rxe_bth *bth = arg;270270+271271+ if (ack)272272+ bth->apsn |= cpu_to_be32(BTH_ACK_MASK);273273+ else274274+ bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK);275275+}276276+277277+static inline void __bth_set_resv7(void *arg)278278+{279279+ struct rxe_bth *bth = arg;280280+281281+ bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK);282282+}283283+284284+static inline u32 __bth_psn(void *arg)285285+{286286+ struct rxe_bth *bth = arg;287287+288288+ return BTH_PSN_MASK & be32_to_cpu(bth->apsn);289289+}290290+291291+static inline void __bth_set_psn(void *arg, u32 psn)292292+{293293+ struct rxe_bth *bth = arg;294294+ u32 apsn = be32_to_cpu(bth->apsn);295295+296296+ bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) |297297+ (~BTH_PSN_MASK & apsn));298298+}299299+300300+static inline u8 bth_opcode(struct rxe_pkt_info *pkt)301301+{302302+ return __bth_opcode(pkt->hdr + pkt->offset);303303+}304304+305305+static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode)306306+{307307+ __bth_set_opcode(pkt->hdr + pkt->offset, opcode);308308+}309309+310310+static inline u8 bth_se(struct rxe_pkt_info *pkt)311311+{312312+ return __bth_se(pkt->hdr + pkt->offset);313313+}314314+315315+static inline void bth_set_se(struct rxe_pkt_info *pkt, int se)316316+{317317+ __bth_set_se(pkt->hdr + pkt->offset, se);318318+}319319+320320+static inline u8 bth_mig(struct rxe_pkt_info *pkt)321321+{322322+ return __bth_mig(pkt->hdr + pkt->offset);323323+}324324+325325+static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig)326326+{327327+ __bth_set_mig(pkt->hdr + pkt->offset, mig);328328+}329329+330330+static inline u8 bth_pad(struct rxe_pkt_info *pkt)331331+{332332+ return __bth_pad(pkt->hdr + pkt->offset);333333+}334334+335335+static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad)336336+{337337+ __bth_set_pad(pkt->hdr + pkt->offset, pad);338338+}339339+340340+static inline u8 bth_tver(struct rxe_pkt_info *pkt)341341+{342342+ return __bth_tver(pkt->hdr + pkt->offset);343343+}344344+345345+static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver)346346+{347347+ __bth_set_tver(pkt->hdr + pkt->offset, tver);348348+}349349+350350+static inline u16 bth_pkey(struct rxe_pkt_info *pkt)351351+{352352+ return __bth_pkey(pkt->hdr + pkt->offset);353353+}354354+355355+static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey)356356+{357357+ __bth_set_pkey(pkt->hdr + pkt->offset, pkey);358358+}359359+360360+static inline u32 bth_qpn(struct rxe_pkt_info *pkt)361361+{362362+ return __bth_qpn(pkt->hdr + pkt->offset);363363+}364364+365365+static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn)366366+{367367+ __bth_set_qpn(pkt->hdr + pkt->offset, qpn);368368+}369369+370370+static inline int bth_fecn(struct rxe_pkt_info *pkt)371371+{372372+ return __bth_fecn(pkt->hdr + pkt->offset);373373+}374374+375375+static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn)376376+{377377+ __bth_set_fecn(pkt->hdr + pkt->offset, fecn);378378+}379379+380380+static inline int bth_becn(struct rxe_pkt_info *pkt)381381+{382382+ return __bth_becn(pkt->hdr + pkt->offset);383383+}384384+385385+static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn)386386+{387387+ __bth_set_becn(pkt->hdr + pkt->offset, becn);388388+}389389+390390+static inline u8 bth_resv6a(struct rxe_pkt_info *pkt)391391+{392392+ return __bth_resv6a(pkt->hdr + pkt->offset);393393+}394394+395395+static inline void bth_set_resv6a(struct rxe_pkt_info *pkt)396396+{397397+ __bth_set_resv6a(pkt->hdr + pkt->offset);398398+}399399+400400+static inline int bth_ack(struct rxe_pkt_info *pkt)401401+{402402+ return __bth_ack(pkt->hdr + pkt->offset);403403+}404404+405405+static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack)406406+{407407+ __bth_set_ack(pkt->hdr + pkt->offset, ack);408408+}409409+410410+static inline void bth_set_resv7(struct rxe_pkt_info *pkt)411411+{412412+ __bth_set_resv7(pkt->hdr + pkt->offset);413413+}414414+415415+static inline u32 bth_psn(struct rxe_pkt_info *pkt)416416+{417417+ return __bth_psn(pkt->hdr + pkt->offset);418418+}419419+420420+static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn)421421+{422422+ __bth_set_psn(pkt->hdr + pkt->offset, psn);423423+}424424+425425+static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se,426426+ int mig, int pad, u16 pkey, u32 qpn, int ack_req,427427+ u32 psn)428428+{429429+ struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset);430430+431431+ bth->opcode = opcode;432432+ bth->flags = (pad << 4) & BTH_PAD_MASK;433433+ if (se)434434+ bth->flags |= BTH_SE_MASK;435435+ if (mig)436436+ bth->flags |= BTH_MIG_MASK;437437+ bth->pkey = cpu_to_be16(pkey);438438+ bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK);439439+ psn &= BTH_PSN_MASK;440440+ if (ack_req)441441+ psn |= BTH_ACK_MASK;442442+ bth->apsn = cpu_to_be32(psn);443443+}444444+445445+/******************************************************************************446446+ * Reliable Datagram Extended Transport Header447447+ ******************************************************************************/448448+struct rxe_rdeth {449449+ __be32 een;450450+};451451+452452+#define RDETH_EEN_MASK (0x00ffffff)453453+454454+static inline u8 __rdeth_een(void *arg)455455+{456456+ struct rxe_rdeth *rdeth = arg;457457+458458+ return RDETH_EEN_MASK & be32_to_cpu(rdeth->een);459459+}460460+461461+static inline void __rdeth_set_een(void *arg, u32 een)462462+{463463+ struct rxe_rdeth *rdeth = arg;464464+465465+ rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een);466466+}467467+468468+static inline u8 rdeth_een(struct rxe_pkt_info *pkt)469469+{470470+ return __rdeth_een(pkt->hdr + pkt->offset471471+ + rxe_opcode[pkt->opcode].offset[RXE_RDETH]);472472+}473473+474474+static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een)475475+{476476+ __rdeth_set_een(pkt->hdr + pkt->offset477477+ + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);478478+}479479+480480+/******************************************************************************481481+ * Datagram Extended Transport Header482482+ ******************************************************************************/483483+struct rxe_deth {484484+ __be32 qkey;485485+ __be32 sqp;486486+};487487+488488+#define GSI_QKEY (0x80010000)489489+#define DETH_SQP_MASK (0x00ffffff)490490+491491+static inline u32 __deth_qkey(void *arg)492492+{493493+ struct rxe_deth *deth = arg;494494+495495+ return be32_to_cpu(deth->qkey);496496+}497497+498498+static inline void __deth_set_qkey(void *arg, u32 qkey)499499+{500500+ struct rxe_deth *deth = arg;501501+502502+ deth->qkey = cpu_to_be32(qkey);503503+}504504+505505+static inline u32 __deth_sqp(void *arg)506506+{507507+ struct rxe_deth *deth = arg;508508+509509+ return DETH_SQP_MASK & be32_to_cpu(deth->sqp);510510+}511511+512512+static inline void __deth_set_sqp(void *arg, u32 sqp)513513+{514514+ struct rxe_deth *deth = arg;515515+516516+ deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp);517517+}518518+519519+static inline u32 deth_qkey(struct rxe_pkt_info *pkt)520520+{521521+ return __deth_qkey(pkt->hdr + pkt->offset522522+ + rxe_opcode[pkt->opcode].offset[RXE_DETH]);523523+}524524+525525+static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey)526526+{527527+ __deth_set_qkey(pkt->hdr + pkt->offset528528+ + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);529529+}530530+531531+static inline u32 deth_sqp(struct rxe_pkt_info *pkt)532532+{533533+ return __deth_sqp(pkt->hdr + pkt->offset534534+ + rxe_opcode[pkt->opcode].offset[RXE_DETH]);535535+}536536+537537+static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp)538538+{539539+ __deth_set_sqp(pkt->hdr + pkt->offset540540+ + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);541541+}542542+543543+/******************************************************************************544544+ * RDMA Extended Transport Header545545+ ******************************************************************************/546546+struct rxe_reth {547547+ __be64 va;548548+ __be32 rkey;549549+ __be32 len;550550+};551551+552552+static inline u64 __reth_va(void *arg)553553+{554554+ struct rxe_reth *reth = arg;555555+556556+ return be64_to_cpu(reth->va);557557+}558558+559559+static inline void __reth_set_va(void *arg, u64 va)560560+{561561+ struct rxe_reth *reth = arg;562562+563563+ reth->va = cpu_to_be64(va);564564+}565565+566566+static inline u32 __reth_rkey(void *arg)567567+{568568+ struct rxe_reth *reth = arg;569569+570570+ return be32_to_cpu(reth->rkey);571571+}572572+573573+static inline void __reth_set_rkey(void *arg, u32 rkey)574574+{575575+ struct rxe_reth *reth = arg;576576+577577+ reth->rkey = cpu_to_be32(rkey);578578+}579579+580580+static inline u32 __reth_len(void *arg)581581+{582582+ struct rxe_reth *reth = arg;583583+584584+ return be32_to_cpu(reth->len);585585+}586586+587587+static inline void __reth_set_len(void *arg, u32 len)588588+{589589+ struct rxe_reth *reth = arg;590590+591591+ reth->len = cpu_to_be32(len);592592+}593593+594594+static inline u64 reth_va(struct rxe_pkt_info *pkt)595595+{596596+ return __reth_va(pkt->hdr + pkt->offset597597+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);598598+}599599+600600+static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va)601601+{602602+ __reth_set_va(pkt->hdr + pkt->offset603603+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], va);604604+}605605+606606+static inline u32 reth_rkey(struct rxe_pkt_info *pkt)607607+{608608+ return __reth_rkey(pkt->hdr + pkt->offset609609+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);610610+}611611+612612+static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)613613+{614614+ __reth_set_rkey(pkt->hdr + pkt->offset615615+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);616616+}617617+618618+static inline u32 reth_len(struct rxe_pkt_info *pkt)619619+{620620+ return __reth_len(pkt->hdr + pkt->offset621621+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);622622+}623623+624624+static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)625625+{626626+ __reth_set_len(pkt->hdr + pkt->offset627627+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], len);628628+}629629+630630+/******************************************************************************631631+ * Atomic Extended Transport Header632632+ ******************************************************************************/633633+struct rxe_atmeth {634634+ __be64 va;635635+ __be32 rkey;636636+ __be64 swap_add;637637+ __be64 comp;638638+} __attribute__((__packed__));639639+640640+static inline u64 __atmeth_va(void *arg)641641+{642642+ struct rxe_atmeth *atmeth = arg;643643+644644+ return be64_to_cpu(atmeth->va);645645+}646646+647647+static inline void __atmeth_set_va(void *arg, u64 va)648648+{649649+ struct rxe_atmeth *atmeth = arg;650650+651651+ atmeth->va = cpu_to_be64(va);652652+}653653+654654+static inline u32 __atmeth_rkey(void *arg)655655+{656656+ struct rxe_atmeth *atmeth = arg;657657+658658+ return be32_to_cpu(atmeth->rkey);659659+}660660+661661+static inline void __atmeth_set_rkey(void *arg, u32 rkey)662662+{663663+ struct rxe_atmeth *atmeth = arg;664664+665665+ atmeth->rkey = cpu_to_be32(rkey);666666+}667667+668668+static inline u64 __atmeth_swap_add(void *arg)669669+{670670+ struct rxe_atmeth *atmeth = arg;671671+672672+ return be64_to_cpu(atmeth->swap_add);673673+}674674+675675+static inline void __atmeth_set_swap_add(void *arg, u64 swap_add)676676+{677677+ struct rxe_atmeth *atmeth = arg;678678+679679+ atmeth->swap_add = cpu_to_be64(swap_add);680680+}681681+682682+static inline u64 __atmeth_comp(void *arg)683683+{684684+ struct rxe_atmeth *atmeth = arg;685685+686686+ return be64_to_cpu(atmeth->comp);687687+}688688+689689+static inline void __atmeth_set_comp(void *arg, u64 comp)690690+{691691+ struct rxe_atmeth *atmeth = arg;692692+693693+ atmeth->comp = cpu_to_be64(comp);694694+}695695+696696+static inline u64 atmeth_va(struct rxe_pkt_info *pkt)697697+{698698+ return __atmeth_va(pkt->hdr + pkt->offset699699+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);700700+}701701+702702+static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va)703703+{704704+ __atmeth_set_va(pkt->hdr + pkt->offset705705+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);706706+}707707+708708+static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt)709709+{710710+ return __atmeth_rkey(pkt->hdr + pkt->offset711711+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);712712+}713713+714714+static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)715715+{716716+ __atmeth_set_rkey(pkt->hdr + pkt->offset717717+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);718718+}719719+720720+static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt)721721+{722722+ return __atmeth_swap_add(pkt->hdr + pkt->offset723723+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);724724+}725725+726726+static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add)727727+{728728+ __atmeth_set_swap_add(pkt->hdr + pkt->offset729729+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);730730+}731731+732732+static inline u64 atmeth_comp(struct rxe_pkt_info *pkt)733733+{734734+ return __atmeth_comp(pkt->hdr + pkt->offset735735+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);736736+}737737+738738+static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp)739739+{740740+ __atmeth_set_comp(pkt->hdr + pkt->offset741741+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);742742+}743743+744744+/******************************************************************************745745+ * Ack Extended Transport Header746746+ ******************************************************************************/747747+struct rxe_aeth {748748+ __be32 smsn;749749+};750750+751751+#define AETH_SYN_MASK (0xff000000)752752+#define AETH_MSN_MASK (0x00ffffff)753753+754754+enum aeth_syndrome {755755+ AETH_TYPE_MASK = 0xe0,756756+ AETH_ACK = 0x00,757757+ AETH_RNR_NAK = 0x20,758758+ AETH_RSVD = 0x40,759759+ AETH_NAK = 0x60,760760+ AETH_ACK_UNLIMITED = 0x1f,761761+ AETH_NAK_PSN_SEQ_ERROR = 0x60,762762+ AETH_NAK_INVALID_REQ = 0x61,763763+ AETH_NAK_REM_ACC_ERR = 0x62,764764+ AETH_NAK_REM_OP_ERR = 0x63,765765+ AETH_NAK_INV_RD_REQ = 0x64,766766+};767767+768768+static inline u8 __aeth_syn(void *arg)769769+{770770+ struct rxe_aeth *aeth = arg;771771+772772+ return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24;773773+}774774+775775+static inline void __aeth_set_syn(void *arg, u8 syn)776776+{777777+ struct rxe_aeth *aeth = arg;778778+ u32 smsn = be32_to_cpu(aeth->smsn);779779+780780+ aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) |781781+ (~AETH_SYN_MASK & smsn));782782+}783783+784784+static inline u32 __aeth_msn(void *arg)785785+{786786+ struct rxe_aeth *aeth = arg;787787+788788+ return AETH_MSN_MASK & be32_to_cpu(aeth->smsn);789789+}790790+791791+static inline void __aeth_set_msn(void *arg, u32 msn)792792+{793793+ struct rxe_aeth *aeth = arg;794794+ u32 smsn = be32_to_cpu(aeth->smsn);795795+796796+ aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) |797797+ (~AETH_MSN_MASK & smsn));798798+}799799+800800+static inline u8 aeth_syn(struct rxe_pkt_info *pkt)801801+{802802+ return __aeth_syn(pkt->hdr + pkt->offset803803+ + rxe_opcode[pkt->opcode].offset[RXE_AETH]);804804+}805805+806806+static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn)807807+{808808+ __aeth_set_syn(pkt->hdr + pkt->offset809809+ + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);810810+}811811+812812+static inline u32 aeth_msn(struct rxe_pkt_info *pkt)813813+{814814+ return __aeth_msn(pkt->hdr + pkt->offset815815+ + rxe_opcode[pkt->opcode].offset[RXE_AETH]);816816+}817817+818818+static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn)819819+{820820+ __aeth_set_msn(pkt->hdr + pkt->offset821821+ + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);822822+}823823+824824+/******************************************************************************825825+ * Atomic Ack Extended Transport Header826826+ ******************************************************************************/827827+struct rxe_atmack {828828+ __be64 orig;829829+};830830+831831+static inline u64 __atmack_orig(void *arg)832832+{833833+ struct rxe_atmack *atmack = arg;834834+835835+ return be64_to_cpu(atmack->orig);836836+}837837+838838+static inline void __atmack_set_orig(void *arg, u64 orig)839839+{840840+ struct rxe_atmack *atmack = arg;841841+842842+ atmack->orig = cpu_to_be64(orig);843843+}844844+845845+static inline u64 atmack_orig(struct rxe_pkt_info *pkt)846846+{847847+ return __atmack_orig(pkt->hdr + pkt->offset848848+ + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);849849+}850850+851851+static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig)852852+{853853+ __atmack_set_orig(pkt->hdr + pkt->offset854854+ + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);855855+}856856+857857+/******************************************************************************858858+ * Immediate Extended Transport Header859859+ ******************************************************************************/860860+struct rxe_immdt {861861+ __be32 imm;862862+};863863+864864+static inline __be32 __immdt_imm(void *arg)865865+{866866+ struct rxe_immdt *immdt = arg;867867+868868+ return immdt->imm;869869+}870870+871871+static inline void __immdt_set_imm(void *arg, __be32 imm)872872+{873873+ struct rxe_immdt *immdt = arg;874874+875875+ immdt->imm = imm;876876+}877877+878878+static inline __be32 immdt_imm(struct rxe_pkt_info *pkt)879879+{880880+ return __immdt_imm(pkt->hdr + pkt->offset881881+ + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);882882+}883883+884884+static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm)885885+{886886+ __immdt_set_imm(pkt->hdr + pkt->offset887887+ + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);888888+}889889+890890+/******************************************************************************891891+ * Invalidate Extended Transport Header892892+ ******************************************************************************/893893+struct rxe_ieth {894894+ __be32 rkey;895895+};896896+897897+static inline u32 __ieth_rkey(void *arg)898898+{899899+ struct rxe_ieth *ieth = arg;900900+901901+ return be32_to_cpu(ieth->rkey);902902+}903903+904904+static inline void __ieth_set_rkey(void *arg, u32 rkey)905905+{906906+ struct rxe_ieth *ieth = arg;907907+908908+ ieth->rkey = cpu_to_be32(rkey);909909+}910910+911911+static inline u32 ieth_rkey(struct rxe_pkt_info *pkt)912912+{913913+ return __ieth_rkey(pkt->hdr + pkt->offset914914+ + rxe_opcode[pkt->opcode].offset[RXE_IETH]);915915+}916916+917917+static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)918918+{919919+ __ieth_set_rkey(pkt->hdr + pkt->offset920920+ + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);921921+}922922+923923+enum rxe_hdr_length {924924+ RXE_BTH_BYTES = sizeof(struct rxe_bth),925925+ RXE_DETH_BYTES = sizeof(struct rxe_deth),926926+ RXE_IMMDT_BYTES = sizeof(struct rxe_immdt),927927+ RXE_RETH_BYTES = sizeof(struct rxe_reth),928928+ RXE_AETH_BYTES = sizeof(struct rxe_aeth),929929+ RXE_ATMACK_BYTES = sizeof(struct rxe_atmack),930930+ RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),931931+ RXE_IETH_BYTES = sizeof(struct rxe_ieth),932932+ RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),933933+};934934+935935+static inline size_t header_size(struct rxe_pkt_info *pkt)936936+{937937+ return pkt->offset + rxe_opcode[pkt->opcode].length;938938+}939939+940940+static inline void *payload_addr(struct rxe_pkt_info *pkt)941941+{942942+ return pkt->hdr + pkt->offset943943+ + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];944944+}945945+946946+static inline size_t payload_size(struct rxe_pkt_info *pkt)947947+{948948+ return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]949949+ - bth_pad(pkt) - RXE_ICRC_SIZE;950950+}951951+952952+#endif /* RXE_HDR_H */
+96
drivers/infiniband/sw/rxe/rxe_icrc.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+/* Compute a partial ICRC for all the IB transport headers. */3838+u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)3939+{4040+ unsigned int bth_offset = 0;4141+ struct iphdr *ip4h = NULL;4242+ struct ipv6hdr *ip6h = NULL;4343+ struct udphdr *udph;4444+ struct rxe_bth *bth;4545+ int crc;4646+ int length;4747+ int hdr_size = sizeof(struct udphdr) +4848+ (skb->protocol == htons(ETH_P_IP) ?4949+ sizeof(struct iphdr) : sizeof(struct ipv6hdr));5050+ /* pseudo header buffer size is calculate using ipv6 header size since5151+ * it is bigger than ipv45252+ */5353+ u8 pshdr[sizeof(struct udphdr) +5454+ sizeof(struct ipv6hdr) +5555+ RXE_BTH_BYTES];5656+5757+ /* This seed is the result of computing a CRC with a seed of5858+ * 0xfffffff and 8 bytes of 0xff representing a masked LRH.5959+ */6060+ crc = 0xdebb20e3;6161+6262+ if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */6363+ memcpy(pshdr, ip_hdr(skb), hdr_size);6464+ ip4h = (struct iphdr *)pshdr;6565+ udph = (struct udphdr *)(ip4h + 1);6666+6767+ ip4h->ttl = 0xff;6868+ ip4h->check = CSUM_MANGLED_0;6969+ ip4h->tos = 0xff;7070+ } else { /* IPv6 */7171+ memcpy(pshdr, ipv6_hdr(skb), hdr_size);7272+ ip6h = (struct ipv6hdr *)pshdr;7373+ udph = (struct udphdr *)(ip6h + 1);7474+7575+ memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl));7676+ ip6h->priority = 0xf;7777+ ip6h->hop_limit = 0xff;7878+ }7979+ udph->check = CSUM_MANGLED_0;8080+8181+ bth_offset += hdr_size;8282+8383+ memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES);8484+ bth = (struct rxe_bth *)&pshdr[bth_offset];8585+8686+ /* exclude bth.resv8a */8787+ bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);8888+8989+ length = hdr_size + RXE_BTH_BYTES;9090+ crc = crc32_le(crc, pshdr, length);9191+9292+ /* And finish to compute the CRC on the remainder of the headers. */9393+ crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,9494+ rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);9595+ return crc;9696+}
+286
drivers/infiniband/sw/rxe/rxe_loc.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_LOC_H3535+#define RXE_LOC_H3636+3737+/* rxe_av.c */3838+3939+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);4040+4141+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,4242+ struct rxe_av *av, struct ib_ah_attr *attr);4343+4444+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,4545+ struct ib_ah_attr *attr);4646+4747+int rxe_av_fill_ip_info(struct rxe_dev *rxe,4848+ struct rxe_av *av,4949+ struct ib_ah_attr *attr,5050+ struct ib_gid_attr *sgid_attr,5151+ union ib_gid *sgid);5252+5353+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);5454+5555+/* rxe_cq.c */5656+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,5757+ int cqe, int comp_vector, struct ib_udata *udata);5858+5959+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,6060+ int comp_vector, struct ib_ucontext *context,6161+ struct ib_udata *udata);6262+6363+int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);6464+6565+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);6666+6767+void rxe_cq_cleanup(void *arg);6868+6969+/* rxe_mcast.c */7070+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,7171+ struct rxe_mc_grp **grp_p);7272+7373+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,7474+ struct rxe_mc_grp *grp);7575+7676+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,7777+ union ib_gid *mgid);7878+7979+void rxe_drop_all_mcast_groups(struct rxe_qp *qp);8080+8181+void rxe_mc_cleanup(void *arg);8282+8383+/* rxe_mmap.c */8484+struct rxe_mmap_info {8585+ struct list_head pending_mmaps;8686+ struct ib_ucontext *context;8787+ struct kref ref;8888+ void *obj;8989+9090+ struct mminfo info;9191+};9292+9393+void rxe_mmap_release(struct kref *ref);9494+9595+struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev,9696+ u32 size,9797+ struct ib_ucontext *context,9898+ void *obj);9999+100100+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);101101+102102+/* rxe_mr.c */103103+enum copy_direction {104104+ to_mem_obj,105105+ from_mem_obj,106106+};107107+108108+int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,109109+ int access, struct rxe_mem *mem);110110+111111+int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,112112+ u64 length, u64 iova, int access, struct ib_udata *udata,113113+ struct rxe_mem *mr);114114+115115+int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,116116+ int max_pages, struct rxe_mem *mem);117117+118118+int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr,119119+ int length, enum copy_direction dir, u32 *crcp);120120+121121+int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access,122122+ struct rxe_dma_info *dma, void *addr, int length,123123+ enum copy_direction dir, u32 *crcp);124124+125125+void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length);126126+127127+enum lookup_type {128128+ lookup_local,129129+ lookup_remote,130130+};131131+132132+struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,133133+ enum lookup_type type);134134+135135+int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);136136+137137+int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,138138+ u64 *page, int num_pages, u64 iova);139139+140140+void rxe_mem_cleanup(void *arg);141141+142142+int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);143143+144144+/* rxe_qp.c */145145+int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);146146+147147+int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,148148+ struct ib_qp_init_attr *init, struct ib_udata *udata,149149+ struct ib_pd *ibpd);150150+151151+int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);152152+153153+int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,154154+ struct ib_qp_attr *attr, int mask);155155+156156+int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,157157+ int mask, struct ib_udata *udata);158158+159159+int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);160160+161161+void rxe_qp_error(struct rxe_qp *qp);162162+163163+void rxe_qp_destroy(struct rxe_qp *qp);164164+165165+void rxe_qp_cleanup(void *arg);166166+167167+static inline int qp_num(struct rxe_qp *qp)168168+{169169+ return qp->ibqp.qp_num;170170+}171171+172172+static inline enum ib_qp_type qp_type(struct rxe_qp *qp)173173+{174174+ return qp->ibqp.qp_type;175175+}176176+177177+static inline enum ib_qp_state qp_state(struct rxe_qp *qp)178178+{179179+ return qp->attr.qp_state;180180+}181181+182182+static inline int qp_mtu(struct rxe_qp *qp)183183+{184184+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)185185+ return qp->attr.path_mtu;186186+ else187187+ return RXE_PORT_MAX_MTU;188188+}189189+190190+static inline int rcv_wqe_size(int max_sge)191191+{192192+ return sizeof(struct rxe_recv_wqe) +193193+ max_sge * sizeof(struct ib_sge);194194+}195195+196196+void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);197197+198198+static inline void rxe_advance_resp_resource(struct rxe_qp *qp)199199+{200200+ qp->resp.res_head++;201201+ if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic))202202+ qp->resp.res_head = 0;203203+}204204+205205+void retransmit_timer(unsigned long data);206206+void rnr_nak_timer(unsigned long data);207207+208208+void dump_qp(struct rxe_qp *qp);209209+210210+/* rxe_srq.c */211211+#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)212212+213213+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,214214+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);215215+216216+int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,217217+ struct ib_srq_init_attr *init,218218+ struct ib_ucontext *context, struct ib_udata *udata);219219+220220+int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,221221+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,222222+ struct ib_udata *udata);223223+224224+extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;225225+226226+void rxe_release(struct kref *kref);227227+228228+int rxe_completer(void *arg);229229+int rxe_requester(void *arg);230230+int rxe_responder(void *arg);231231+232232+u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);233233+234234+void rxe_resp_queue_pkt(struct rxe_dev *rxe,235235+ struct rxe_qp *qp, struct sk_buff *skb);236236+237237+void rxe_comp_queue_pkt(struct rxe_dev *rxe,238238+ struct rxe_qp *qp, struct sk_buff *skb);239239+240240+static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)241241+{242242+ return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];243243+}244244+245245+static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,246246+ struct rxe_pkt_info *pkt, struct sk_buff *skb)247247+{248248+ int err;249249+ int is_request = pkt->mask & RXE_REQ_MASK;250250+251251+ if ((is_request && (qp->req.state != QP_STATE_READY)) ||252252+ (!is_request && (qp->resp.state != QP_STATE_READY))) {253253+ pr_info("Packet dropped. QP is not in ready state\n");254254+ goto drop;255255+ }256256+257257+ if (pkt->mask & RXE_LOOPBACK_MASK) {258258+ memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));259259+ err = rxe->ifc_ops->loopback(skb);260260+ } else {261261+ err = rxe->ifc_ops->send(rxe, pkt, skb);262262+ }263263+264264+ if (err) {265265+ rxe->xmit_errors++;266266+ return err;267267+ }268268+269269+ atomic_inc(&qp->skb_out);270270+271271+ if ((qp_type(qp) != IB_QPT_RC) &&272272+ (pkt->mask & RXE_END_MASK)) {273273+ pkt->wqe->state = wqe_state_done;274274+ rxe_run_task(&qp->comp.task, 1);275275+ }276276+277277+ goto done;278278+279279+drop:280280+ kfree_skb(skb);281281+ err = 0;282282+done:283283+ return err;284284+}285285+286286+#endif /* RXE_LOC_H */
+190
drivers/infiniband/sw/rxe/rxe_mcast.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,3838+ struct rxe_mc_grp **grp_p)3939+{4040+ int err;4141+ struct rxe_mc_grp *grp;4242+4343+ if (rxe->attr.max_mcast_qp_attach == 0) {4444+ err = -EINVAL;4545+ goto err1;4646+ }4747+4848+ grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);4949+ if (grp)5050+ goto done;5151+5252+ grp = rxe_alloc(&rxe->mc_grp_pool);5353+ if (!grp) {5454+ err = -ENOMEM;5555+ goto err1;5656+ }5757+5858+ INIT_LIST_HEAD(&grp->qp_list);5959+ spin_lock_init(&grp->mcg_lock);6060+ grp->rxe = rxe;6161+6262+ rxe_add_key(grp, mgid);6363+6464+ err = rxe->ifc_ops->mcast_add(rxe, mgid);6565+ if (err)6666+ goto err2;6767+6868+done:6969+ *grp_p = grp;7070+ return 0;7171+7272+err2:7373+ rxe_drop_ref(grp);7474+err1:7575+ return err;7676+}7777+7878+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,7979+ struct rxe_mc_grp *grp)8080+{8181+ int err;8282+ struct rxe_mc_elem *elem;8383+8484+ /* check to see of the qp is already a member of the group */8585+ spin_lock_bh(&qp->grp_lock);8686+ spin_lock_bh(&grp->mcg_lock);8787+ list_for_each_entry(elem, &grp->qp_list, qp_list) {8888+ if (elem->qp == qp) {8989+ err = 0;9090+ goto out;9191+ }9292+ }9393+9494+ if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {9595+ err = -ENOMEM;9696+ goto out;9797+ }9898+9999+ elem = rxe_alloc(&rxe->mc_elem_pool);100100+ if (!elem) {101101+ err = -ENOMEM;102102+ goto out;103103+ }104104+105105+ /* each qp holds a ref on the grp */106106+ rxe_add_ref(grp);107107+108108+ grp->num_qp++;109109+ elem->qp = qp;110110+ elem->grp = grp;111111+112112+ list_add(&elem->qp_list, &grp->qp_list);113113+ list_add(&elem->grp_list, &qp->grp_list);114114+115115+ err = 0;116116+out:117117+ spin_unlock_bh(&grp->mcg_lock);118118+ spin_unlock_bh(&qp->grp_lock);119119+ return err;120120+}121121+122122+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,123123+ union ib_gid *mgid)124124+{125125+ struct rxe_mc_grp *grp;126126+ struct rxe_mc_elem *elem, *tmp;127127+128128+ grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);129129+ if (!grp)130130+ goto err1;131131+132132+ spin_lock_bh(&qp->grp_lock);133133+ spin_lock_bh(&grp->mcg_lock);134134+135135+ list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {136136+ if (elem->qp == qp) {137137+ list_del(&elem->qp_list);138138+ list_del(&elem->grp_list);139139+ grp->num_qp--;140140+141141+ spin_unlock_bh(&grp->mcg_lock);142142+ spin_unlock_bh(&qp->grp_lock);143143+ rxe_drop_ref(elem);144144+ rxe_drop_ref(grp); /* ref held by QP */145145+ rxe_drop_ref(grp); /* ref from get_key */146146+ return 0;147147+ }148148+ }149149+150150+ spin_unlock_bh(&grp->mcg_lock);151151+ spin_unlock_bh(&qp->grp_lock);152152+ rxe_drop_ref(grp); /* ref from get_key */153153+err1:154154+ return -EINVAL;155155+}156156+157157+void rxe_drop_all_mcast_groups(struct rxe_qp *qp)158158+{159159+ struct rxe_mc_grp *grp;160160+ struct rxe_mc_elem *elem;161161+162162+ while (1) {163163+ spin_lock_bh(&qp->grp_lock);164164+ if (list_empty(&qp->grp_list)) {165165+ spin_unlock_bh(&qp->grp_lock);166166+ break;167167+ }168168+ elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,169169+ grp_list);170170+ list_del(&elem->grp_list);171171+ spin_unlock_bh(&qp->grp_lock);172172+173173+ grp = elem->grp;174174+ spin_lock_bh(&grp->mcg_lock);175175+ list_del(&elem->qp_list);176176+ grp->num_qp--;177177+ spin_unlock_bh(&grp->mcg_lock);178178+ rxe_drop_ref(grp);179179+ rxe_drop_ref(elem);180180+ }181181+}182182+183183+void rxe_mc_cleanup(void *arg)184184+{185185+ struct rxe_mc_grp *grp = arg;186186+ struct rxe_dev *rxe = grp->rxe;187187+188188+ rxe_drop_key(grp);189189+ rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);190190+}
+173
drivers/infiniband/sw/rxe/rxe_mmap.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/module.h>3535+#include <linux/vmalloc.h>3636+#include <linux/mm.h>3737+#include <linux/errno.h>3838+#include <asm/pgtable.h>3939+4040+#include "rxe.h"4141+#include "rxe_loc.h"4242+#include "rxe_queue.h"4343+4444+void rxe_mmap_release(struct kref *ref)4545+{4646+ struct rxe_mmap_info *ip = container_of(ref,4747+ struct rxe_mmap_info, ref);4848+ struct rxe_dev *rxe = to_rdev(ip->context->device);4949+5050+ spin_lock_bh(&rxe->pending_lock);5151+5252+ if (!list_empty(&ip->pending_mmaps))5353+ list_del(&ip->pending_mmaps);5454+5555+ spin_unlock_bh(&rxe->pending_lock);5656+5757+ vfree(ip->obj); /* buf */5858+ kfree(ip);5959+}6060+6161+/*6262+ * open and close keep track of how many times the memory region is mapped,6363+ * to avoid releasing it.6464+ */6565+static void rxe_vma_open(struct vm_area_struct *vma)6666+{6767+ struct rxe_mmap_info *ip = vma->vm_private_data;6868+6969+ kref_get(&ip->ref);7070+}7171+7272+static void rxe_vma_close(struct vm_area_struct *vma)7373+{7474+ struct rxe_mmap_info *ip = vma->vm_private_data;7575+7676+ kref_put(&ip->ref, rxe_mmap_release);7777+}7878+7979+static struct vm_operations_struct rxe_vm_ops = {8080+ .open = rxe_vma_open,8181+ .close = rxe_vma_close,8282+};8383+8484+/**8585+ * rxe_mmap - create a new mmap region8686+ * @context: the IB user context of the process making the mmap() call8787+ * @vma: the VMA to be initialized8888+ * Return zero if the mmap is OK. Otherwise, return an errno.8989+ */9090+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)9191+{9292+ struct rxe_dev *rxe = to_rdev(context->device);9393+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;9494+ unsigned long size = vma->vm_end - vma->vm_start;9595+ struct rxe_mmap_info *ip, *pp;9696+ int ret;9797+9898+ /*9999+ * Search the device's list of objects waiting for a mmap call.100100+ * Normally, this list is very short since a call to create a101101+ * CQ, QP, or SRQ is soon followed by a call to mmap().102102+ */103103+ spin_lock_bh(&rxe->pending_lock);104104+ list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) {105105+ if (context != ip->context || (__u64)offset != ip->info.offset)106106+ continue;107107+108108+ /* Don't allow a mmap larger than the object. */109109+ if (size > ip->info.size) {110110+ pr_err("mmap region is larger than the object!\n");111111+ spin_unlock_bh(&rxe->pending_lock);112112+ ret = -EINVAL;113113+ goto done;114114+ }115115+116116+ goto found_it;117117+ }118118+ pr_warn("unable to find pending mmap info\n");119119+ spin_unlock_bh(&rxe->pending_lock);120120+ ret = -EINVAL;121121+ goto done;122122+123123+found_it:124124+ list_del_init(&ip->pending_mmaps);125125+ spin_unlock_bh(&rxe->pending_lock);126126+127127+ ret = remap_vmalloc_range(vma, ip->obj, 0);128128+ if (ret) {129129+ pr_err("rxe: err %d from remap_vmalloc_range\n", ret);130130+ goto done;131131+ }132132+133133+ vma->vm_ops = &rxe_vm_ops;134134+ vma->vm_private_data = ip;135135+ rxe_vma_open(vma);136136+done:137137+ return ret;138138+}139139+140140+/*141141+ * Allocate information for rxe_mmap142142+ */143143+struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,144144+ u32 size,145145+ struct ib_ucontext *context,146146+ void *obj)147147+{148148+ struct rxe_mmap_info *ip;149149+150150+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);151151+ if (!ip)152152+ return NULL;153153+154154+ size = PAGE_ALIGN(size);155155+156156+ spin_lock_bh(&rxe->mmap_offset_lock);157157+158158+ if (rxe->mmap_offset == 0)159159+ rxe->mmap_offset = PAGE_SIZE;160160+161161+ ip->info.offset = rxe->mmap_offset;162162+ rxe->mmap_offset += size;163163+164164+ spin_unlock_bh(&rxe->mmap_offset_lock);165165+166166+ INIT_LIST_HEAD(&ip->pending_mmaps);167167+ ip->info.size = size;168168+ ip->context = context;169169+ ip->obj = obj;170170+ kref_init(&ip->ref);171171+172172+ return ip;173173+}
+643
drivers/infiniband/sw/rxe/rxe_mr.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+/*3838+ * lfsr (linear feedback shift register) with period 2553939+ */4040+static u8 rxe_get_key(void)4141+{4242+ static unsigned key = 1;4343+4444+ key = key << 1;4545+4646+ key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))4747+ ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));4848+4949+ key &= 0xff;5050+5151+ return key;5252+}5353+5454+int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)5555+{5656+ switch (mem->type) {5757+ case RXE_MEM_TYPE_DMA:5858+ return 0;5959+6060+ case RXE_MEM_TYPE_MR:6161+ case RXE_MEM_TYPE_FMR:6262+ return ((iova < mem->iova) ||6363+ ((iova + length) > (mem->iova + mem->length))) ?6464+ -EFAULT : 0;6565+6666+ default:6767+ return -EFAULT;6868+ }6969+}7070+7171+#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \7272+ | IB_ACCESS_REMOTE_WRITE \7373+ | IB_ACCESS_REMOTE_ATOMIC)7474+7575+static void rxe_mem_init(int access, struct rxe_mem *mem)7676+{7777+ u32 lkey = mem->pelem.index << 8 | rxe_get_key();7878+ u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;7979+8080+ if (mem->pelem.pool->type == RXE_TYPE_MR) {8181+ mem->ibmr.lkey = lkey;8282+ mem->ibmr.rkey = rkey;8383+ }8484+8585+ mem->lkey = lkey;8686+ mem->rkey = rkey;8787+ mem->state = RXE_MEM_STATE_INVALID;8888+ mem->type = RXE_MEM_TYPE_NONE;8989+ mem->map_shift = ilog2(RXE_BUF_PER_MAP);9090+}9191+9292+void rxe_mem_cleanup(void *arg)9393+{9494+ struct rxe_mem *mem = arg;9595+ int i;9696+9797+ if (mem->umem)9898+ ib_umem_release(mem->umem);9999+100100+ if (mem->map) {101101+ for (i = 0; i < mem->num_map; i++)102102+ kfree(mem->map[i]);103103+104104+ kfree(mem->map);105105+ }106106+}107107+108108+static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)109109+{110110+ int i;111111+ int num_map;112112+ struct rxe_map **map = mem->map;113113+114114+ num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;115115+116116+ mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);117117+ if (!mem->map)118118+ goto err1;119119+120120+ for (i = 0; i < num_map; i++) {121121+ mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);122122+ if (!mem->map[i])123123+ goto err2;124124+ }125125+126126+ WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));127127+128128+ mem->map_shift = ilog2(RXE_BUF_PER_MAP);129129+ mem->map_mask = RXE_BUF_PER_MAP - 1;130130+131131+ mem->num_buf = num_buf;132132+ mem->num_map = num_map;133133+ mem->max_buf = num_map * RXE_BUF_PER_MAP;134134+135135+ return 0;136136+137137+err2:138138+ for (i--; i >= 0; i--)139139+ kfree(mem->map[i]);140140+141141+ kfree(mem->map);142142+err1:143143+ return -ENOMEM;144144+}145145+146146+int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,147147+ int access, struct rxe_mem *mem)148148+{149149+ rxe_mem_init(access, mem);150150+151151+ mem->pd = pd;152152+ mem->access = access;153153+ mem->state = RXE_MEM_STATE_VALID;154154+ mem->type = RXE_MEM_TYPE_DMA;155155+156156+ return 0;157157+}158158+159159+int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,160160+ u64 length, u64 iova, int access, struct ib_udata *udata,161161+ struct rxe_mem *mem)162162+{163163+ int entry;164164+ struct rxe_map **map;165165+ struct rxe_phys_buf *buf = NULL;166166+ struct ib_umem *umem;167167+ struct scatterlist *sg;168168+ int num_buf;169169+ void *vaddr;170170+ int err;171171+172172+ umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);173173+ if (IS_ERR(umem)) {174174+ pr_warn("err %d from rxe_umem_get\n",175175+ (int)PTR_ERR(umem));176176+ err = -EINVAL;177177+ goto err1;178178+ }179179+180180+ mem->umem = umem;181181+ num_buf = umem->nmap;182182+183183+ rxe_mem_init(access, mem);184184+185185+ err = rxe_mem_alloc(rxe, mem, num_buf);186186+ if (err) {187187+ pr_warn("err %d from rxe_mem_alloc\n", err);188188+ ib_umem_release(umem);189189+ goto err1;190190+ }191191+192192+ WARN_ON(!is_power_of_2(umem->page_size));193193+194194+ mem->page_shift = ilog2(umem->page_size);195195+ mem->page_mask = umem->page_size - 1;196196+197197+ num_buf = 0;198198+ map = mem->map;199199+ if (length > 0) {200200+ buf = map[0]->buf;201201+202202+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {203203+ vaddr = page_address(sg_page(sg));204204+ if (!vaddr) {205205+ pr_warn("null vaddr\n");206206+ err = -ENOMEM;207207+ goto err1;208208+ }209209+210210+ buf->addr = (uintptr_t)vaddr;211211+ buf->size = umem->page_size;212212+ num_buf++;213213+ buf++;214214+215215+ if (num_buf >= RXE_BUF_PER_MAP) {216216+ map++;217217+ buf = map[0]->buf;218218+ num_buf = 0;219219+ }220220+ }221221+ }222222+223223+ mem->pd = pd;224224+ mem->umem = umem;225225+ mem->access = access;226226+ mem->length = length;227227+ mem->iova = iova;228228+ mem->va = start;229229+ mem->offset = ib_umem_offset(umem);230230+ mem->state = RXE_MEM_STATE_VALID;231231+ mem->type = RXE_MEM_TYPE_MR;232232+233233+ return 0;234234+235235+err1:236236+ return err;237237+}238238+239239+int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,240240+ int max_pages, struct rxe_mem *mem)241241+{242242+ int err;243243+244244+ rxe_mem_init(0, mem);245245+246246+ /* In fastreg, we also set the rkey */247247+ mem->ibmr.rkey = mem->ibmr.lkey;248248+249249+ err = rxe_mem_alloc(rxe, mem, max_pages);250250+ if (err)251251+ goto err1;252252+253253+ mem->pd = pd;254254+ mem->max_buf = max_pages;255255+ mem->state = RXE_MEM_STATE_FREE;256256+ mem->type = RXE_MEM_TYPE_MR;257257+258258+ return 0;259259+260260+err1:261261+ return err;262262+}263263+264264+static void lookup_iova(265265+ struct rxe_mem *mem,266266+ u64 iova,267267+ int *m_out,268268+ int *n_out,269269+ size_t *offset_out)270270+{271271+ size_t offset = iova - mem->iova + mem->offset;272272+ int map_index;273273+ int buf_index;274274+ u64 length;275275+276276+ if (likely(mem->page_shift)) {277277+ *offset_out = offset & mem->page_mask;278278+ offset >>= mem->page_shift;279279+ *n_out = offset & mem->map_mask;280280+ *m_out = offset >> mem->map_shift;281281+ } else {282282+ map_index = 0;283283+ buf_index = 0;284284+285285+ length = mem->map[map_index]->buf[buf_index].size;286286+287287+ while (offset >= length) {288288+ offset -= length;289289+ buf_index++;290290+291291+ if (buf_index == RXE_BUF_PER_MAP) {292292+ map_index++;293293+ buf_index = 0;294294+ }295295+ length = mem->map[map_index]->buf[buf_index].size;296296+ }297297+298298+ *m_out = map_index;299299+ *n_out = buf_index;300300+ *offset_out = offset;301301+ }302302+}303303+304304+void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)305305+{306306+ size_t offset;307307+ int m, n;308308+ void *addr;309309+310310+ if (mem->state != RXE_MEM_STATE_VALID) {311311+ pr_warn("mem not in valid state\n");312312+ addr = NULL;313313+ goto out;314314+ }315315+316316+ if (!mem->map) {317317+ addr = (void *)(uintptr_t)iova;318318+ goto out;319319+ }320320+321321+ if (mem_check_range(mem, iova, length)) {322322+ pr_warn("range violation\n");323323+ addr = NULL;324324+ goto out;325325+ }326326+327327+ lookup_iova(mem, iova, &m, &n, &offset);328328+329329+ if (offset + length > mem->map[m]->buf[n].size) {330330+ pr_warn("crosses page boundary\n");331331+ addr = NULL;332332+ goto out;333333+ }334334+335335+ addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;336336+337337+out:338338+ return addr;339339+}340340+341341+/* copy data from a range (vaddr, vaddr+length-1) to or from342342+ * a mem object starting at iova. Compute incremental value of343343+ * crc32 if crcp is not zero. caller must hold a reference to mem344344+ */345345+int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,346346+ enum copy_direction dir, u32 *crcp)347347+{348348+ int err;349349+ int bytes;350350+ u8 *va;351351+ struct rxe_map **map;352352+ struct rxe_phys_buf *buf;353353+ int m;354354+ int i;355355+ size_t offset;356356+ u32 crc = crcp ? (*crcp) : 0;357357+358358+ if (mem->type == RXE_MEM_TYPE_DMA) {359359+ u8 *src, *dest;360360+361361+ src = (dir == to_mem_obj) ?362362+ addr : ((void *)(uintptr_t)iova);363363+364364+ dest = (dir == to_mem_obj) ?365365+ ((void *)(uintptr_t)iova) : addr;366366+367367+ if (crcp)368368+ *crcp = crc32_le(*crcp, src, length);369369+370370+ memcpy(dest, src, length);371371+372372+ return 0;373373+ }374374+375375+ WARN_ON(!mem->map);376376+377377+ err = mem_check_range(mem, iova, length);378378+ if (err) {379379+ err = -EFAULT;380380+ goto err1;381381+ }382382+383383+ lookup_iova(mem, iova, &m, &i, &offset);384384+385385+ map = mem->map + m;386386+ buf = map[0]->buf + i;387387+388388+ while (length > 0) {389389+ u8 *src, *dest;390390+391391+ va = (u8 *)(uintptr_t)buf->addr + offset;392392+ src = (dir == to_mem_obj) ? addr : va;393393+ dest = (dir == to_mem_obj) ? va : addr;394394+395395+ bytes = buf->size - offset;396396+397397+ if (bytes > length)398398+ bytes = length;399399+400400+ if (crcp)401401+ crc = crc32_le(crc, src, bytes);402402+403403+ memcpy(dest, src, bytes);404404+405405+ length -= bytes;406406+ addr += bytes;407407+408408+ offset = 0;409409+ buf++;410410+ i++;411411+412412+ if (i == RXE_BUF_PER_MAP) {413413+ i = 0;414414+ map++;415415+ buf = map[0]->buf;416416+ }417417+ }418418+419419+ if (crcp)420420+ *crcp = crc;421421+422422+ return 0;423423+424424+err1:425425+ return err;426426+}427427+428428+/* copy data in or out of a wqe, i.e. sg list429429+ * under the control of a dma descriptor430430+ */431431+int copy_data(432432+ struct rxe_dev *rxe,433433+ struct rxe_pd *pd,434434+ int access,435435+ struct rxe_dma_info *dma,436436+ void *addr,437437+ int length,438438+ enum copy_direction dir,439439+ u32 *crcp)440440+{441441+ int bytes;442442+ struct rxe_sge *sge = &dma->sge[dma->cur_sge];443443+ int offset = dma->sge_offset;444444+ int resid = dma->resid;445445+ struct rxe_mem *mem = NULL;446446+ u64 iova;447447+ int err;448448+449449+ if (length == 0)450450+ return 0;451451+452452+ if (length > resid) {453453+ err = -EINVAL;454454+ goto err2;455455+ }456456+457457+ if (sge->length && (offset < sge->length)) {458458+ mem = lookup_mem(pd, access, sge->lkey, lookup_local);459459+ if (!mem) {460460+ err = -EINVAL;461461+ goto err1;462462+ }463463+ }464464+465465+ while (length > 0) {466466+ bytes = length;467467+468468+ if (offset >= sge->length) {469469+ if (mem) {470470+ rxe_drop_ref(mem);471471+ mem = NULL;472472+ }473473+ sge++;474474+ dma->cur_sge++;475475+ offset = 0;476476+477477+ if (dma->cur_sge >= dma->num_sge) {478478+ err = -ENOSPC;479479+ goto err2;480480+ }481481+482482+ if (sge->length) {483483+ mem = lookup_mem(pd, access, sge->lkey,484484+ lookup_local);485485+ if (!mem) {486486+ err = -EINVAL;487487+ goto err1;488488+ }489489+ } else {490490+ continue;491491+ }492492+ }493493+494494+ if (bytes > sge->length - offset)495495+ bytes = sge->length - offset;496496+497497+ if (bytes > 0) {498498+ iova = sge->addr + offset;499499+500500+ err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);501501+ if (err)502502+ goto err2;503503+504504+ offset += bytes;505505+ resid -= bytes;506506+ length -= bytes;507507+ addr += bytes;508508+ }509509+ }510510+511511+ dma->sge_offset = offset;512512+ dma->resid = resid;513513+514514+ if (mem)515515+ rxe_drop_ref(mem);516516+517517+ return 0;518518+519519+err2:520520+ if (mem)521521+ rxe_drop_ref(mem);522522+err1:523523+ return err;524524+}525525+526526+int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)527527+{528528+ struct rxe_sge *sge = &dma->sge[dma->cur_sge];529529+ int offset = dma->sge_offset;530530+ int resid = dma->resid;531531+532532+ while (length) {533533+ unsigned int bytes;534534+535535+ if (offset >= sge->length) {536536+ sge++;537537+ dma->cur_sge++;538538+ offset = 0;539539+ if (dma->cur_sge >= dma->num_sge)540540+ return -ENOSPC;541541+ }542542+543543+ bytes = length;544544+545545+ if (bytes > sge->length - offset)546546+ bytes = sge->length - offset;547547+548548+ offset += bytes;549549+ resid -= bytes;550550+ length -= bytes;551551+ }552552+553553+ dma->sge_offset = offset;554554+ dma->resid = resid;555555+556556+ return 0;557557+}558558+559559+/* (1) find the mem (mr or mw) corresponding to lkey/rkey560560+ * depending on lookup_type561561+ * (2) verify that the (qp) pd matches the mem pd562562+ * (3) verify that the mem can support the requested access563563+ * (4) verify that mem state is valid564564+ */565565+struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,566566+ enum lookup_type type)567567+{568568+ struct rxe_mem *mem;569569+ struct rxe_dev *rxe = to_rdev(pd->ibpd.device);570570+ int index = key >> 8;571571+572572+ if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {573573+ mem = rxe_pool_get_index(&rxe->mr_pool, index);574574+ if (!mem)575575+ goto err1;576576+ } else {577577+ goto err1;578578+ }579579+580580+ if ((type == lookup_local && mem->lkey != key) ||581581+ (type == lookup_remote && mem->rkey != key))582582+ goto err2;583583+584584+ if (mem->pd != pd)585585+ goto err2;586586+587587+ if (access && !(access & mem->access))588588+ goto err2;589589+590590+ if (mem->state != RXE_MEM_STATE_VALID)591591+ goto err2;592592+593593+ return mem;594594+595595+err2:596596+ rxe_drop_ref(mem);597597+err1:598598+ return NULL;599599+}600600+601601+int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,602602+ u64 *page, int num_pages, u64 iova)603603+{604604+ int i;605605+ int num_buf;606606+ int err;607607+ struct rxe_map **map;608608+ struct rxe_phys_buf *buf;609609+ int page_size;610610+611611+ if (num_pages > mem->max_buf) {612612+ err = -EINVAL;613613+ goto err1;614614+ }615615+616616+ num_buf = 0;617617+ page_size = 1 << mem->page_shift;618618+ map = mem->map;619619+ buf = map[0]->buf;620620+621621+ for (i = 0; i < num_pages; i++) {622622+ buf->addr = *page++;623623+ buf->size = page_size;624624+ buf++;625625+ num_buf++;626626+627627+ if (num_buf == RXE_BUF_PER_MAP) {628628+ map++;629629+ buf = map[0]->buf;630630+ num_buf = 0;631631+ }632632+ }633633+634634+ mem->iova = iova;635635+ mem->va = iova;636636+ mem->length = num_pages << mem->page_shift;637637+ mem->state = RXE_MEM_STATE_VALID;638638+639639+ return 0;640640+641641+err1:642642+ return err;643643+}
+708
drivers/infiniband/sw/rxe/rxe_net.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+#include <linux/if_arp.h>3636+#include <linux/netdevice.h>3737+#include <linux/if.h>3838+#include <linux/if_vlan.h>3939+#include <net/udp_tunnel.h>4040+#include <net/sch_generic.h>4141+#include <linux/netfilter.h>4242+#include <rdma/ib_addr.h>4343+4444+#include "rxe.h"4545+#include "rxe_net.h"4646+#include "rxe_loc.h"4747+4848+static LIST_HEAD(rxe_dev_list);4949+static spinlock_t dev_list_lock; /* spinlock for device list */5050+5151+struct rxe_dev *net_to_rxe(struct net_device *ndev)5252+{5353+ struct rxe_dev *rxe;5454+ struct rxe_dev *found = NULL;5555+5656+ spin_lock_bh(&dev_list_lock);5757+ list_for_each_entry(rxe, &rxe_dev_list, list) {5858+ if (rxe->ndev == ndev) {5959+ found = rxe;6060+ break;6161+ }6262+ }6363+ spin_unlock_bh(&dev_list_lock);6464+6565+ return found;6666+}6767+6868+struct rxe_dev *get_rxe_by_name(const char* name)6969+{7070+ struct rxe_dev *rxe;7171+ struct rxe_dev *found = NULL;7272+7373+ spin_lock_bh(&dev_list_lock);7474+ list_for_each_entry(rxe, &rxe_dev_list, list) {7575+ if (!strcmp(name, rxe->ib_dev.name)) {7676+ found = rxe;7777+ break;7878+ }7979+ }8080+ spin_unlock_bh(&dev_list_lock);8181+ return found;8282+}8383+8484+8585+struct rxe_recv_sockets recv_sockets;8686+8787+static __be64 rxe_mac_to_eui64(struct net_device *ndev)8888+{8989+ unsigned char *mac_addr = ndev->dev_addr;9090+ __be64 eui64;9191+ unsigned char *dst = (unsigned char *)&eui64;9292+9393+ dst[0] = mac_addr[0] ^ 2;9494+ dst[1] = mac_addr[1];9595+ dst[2] = mac_addr[2];9696+ dst[3] = 0xff;9797+ dst[4] = 0xfe;9898+ dst[5] = mac_addr[3];9999+ dst[6] = mac_addr[4];100100+ dst[7] = mac_addr[5];101101+102102+ return eui64;103103+}104104+105105+static __be64 node_guid(struct rxe_dev *rxe)106106+{107107+ return rxe_mac_to_eui64(rxe->ndev);108108+}109109+110110+static __be64 port_guid(struct rxe_dev *rxe)111111+{112112+ return rxe_mac_to_eui64(rxe->ndev);113113+}114114+115115+static struct device *dma_device(struct rxe_dev *rxe)116116+{117117+ struct net_device *ndev;118118+119119+ ndev = rxe->ndev;120120+121121+ if (ndev->priv_flags & IFF_802_1Q_VLAN)122122+ ndev = vlan_dev_real_dev(ndev);123123+124124+ return ndev->dev.parent;125125+}126126+127127+static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)128128+{129129+ int err;130130+ unsigned char ll_addr[ETH_ALEN];131131+132132+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);133133+ err = dev_mc_add(rxe->ndev, ll_addr);134134+135135+ return err;136136+}137137+138138+static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)139139+{140140+ int err;141141+ unsigned char ll_addr[ETH_ALEN];142142+143143+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);144144+ err = dev_mc_del(rxe->ndev, ll_addr);145145+146146+ return err;147147+}148148+149149+static struct dst_entry *rxe_find_route4(struct net_device *ndev,150150+ struct in_addr *saddr,151151+ struct in_addr *daddr)152152+{153153+ struct rtable *rt;154154+ struct flowi4 fl = { { 0 } };155155+156156+ memset(&fl, 0, sizeof(fl));157157+ fl.flowi4_oif = ndev->ifindex;158158+ memcpy(&fl.saddr, saddr, sizeof(*saddr));159159+ memcpy(&fl.daddr, daddr, sizeof(*daddr));160160+ fl.flowi4_proto = IPPROTO_UDP;161161+162162+ rt = ip_route_output_key(&init_net, &fl);163163+ if (IS_ERR(rt)) {164164+ pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);165165+ return NULL;166166+ }167167+168168+ return &rt->dst;169169+}170170+171171+#if IS_ENABLED(CONFIG_IPV6)172172+static struct dst_entry *rxe_find_route6(struct net_device *ndev,173173+ struct in6_addr *saddr,174174+ struct in6_addr *daddr)175175+{176176+ struct dst_entry *ndst;177177+ struct flowi6 fl6 = { { 0 } };178178+179179+ memset(&fl6, 0, sizeof(fl6));180180+ fl6.flowi6_oif = ndev->ifindex;181181+ memcpy(&fl6.saddr, saddr, sizeof(*saddr));182182+ memcpy(&fl6.daddr, daddr, sizeof(*daddr));183183+ fl6.flowi6_proto = IPPROTO_UDP;184184+185185+ if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),186186+ recv_sockets.sk6->sk, &ndst, &fl6))) {187187+ pr_err_ratelimited("no route to %pI6\n", daddr);188188+ goto put;189189+ }190190+191191+ if (unlikely(ndst->error)) {192192+ pr_err("no route to %pI6\n", daddr);193193+ goto put;194194+ }195195+196196+ return ndst;197197+put:198198+ dst_release(ndst);199199+ return NULL;200200+}201201+202202+#else203203+204204+static struct dst_entry *rxe_find_route6(struct net_device *ndev,205205+ struct in6_addr *saddr,206206+ struct in6_addr *daddr)207207+{208208+ return NULL;209209+}210210+211211+#endif212212+213213+static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)214214+{215215+ struct udphdr *udph;216216+ struct net_device *ndev = skb->dev;217217+ struct rxe_dev *rxe = net_to_rxe(ndev);218218+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);219219+220220+ if (!rxe)221221+ goto drop;222222+223223+ if (skb_linearize(skb)) {224224+ pr_err("skb_linearize failed\n");225225+ goto drop;226226+ }227227+228228+ udph = udp_hdr(skb);229229+ pkt->rxe = rxe;230230+ pkt->port_num = 1;231231+ pkt->hdr = (u8 *)(udph + 1);232232+ pkt->mask = RXE_GRH_MASK;233233+ pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);234234+235235+ return rxe_rcv(skb);236236+drop:237237+ kfree_skb(skb);238238+ return 0;239239+}240240+241241+static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,242242+ bool ipv6)243243+{244244+ int err;245245+ struct socket *sock;246246+ struct udp_port_cfg udp_cfg;247247+ struct udp_tunnel_sock_cfg tnl_cfg;248248+249249+ memset(&udp_cfg, 0, sizeof(udp_cfg));250250+251251+ if (ipv6) {252252+ udp_cfg.family = AF_INET6;253253+ udp_cfg.ipv6_v6only = 1;254254+ } else {255255+ udp_cfg.family = AF_INET;256256+ }257257+258258+ udp_cfg.local_udp_port = port;259259+260260+ /* Create UDP socket */261261+ err = udp_sock_create(net, &udp_cfg, &sock);262262+ if (err < 0) {263263+ pr_err("failed to create udp socket. err = %d\n", err);264264+ return ERR_PTR(err);265265+ }266266+267267+ tnl_cfg.sk_user_data = NULL;268268+ tnl_cfg.encap_type = 1;269269+ tnl_cfg.encap_rcv = rxe_udp_encap_recv;270270+ tnl_cfg.encap_destroy = NULL;271271+272272+ /* Setup UDP tunnel */273273+ setup_udp_tunnel_sock(net, sock, &tnl_cfg);274274+275275+ return sock;276276+}277277+278278+static void rxe_release_udp_tunnel(struct socket *sk)279279+{280280+ udp_tunnel_sock_release(sk);281281+}282282+283283+static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,284284+ __be16 dst_port)285285+{286286+ struct udphdr *udph;287287+288288+ __skb_push(skb, sizeof(*udph));289289+ skb_reset_transport_header(skb);290290+ udph = udp_hdr(skb);291291+292292+ udph->dest = dst_port;293293+ udph->source = src_port;294294+ udph->len = htons(skb->len);295295+ udph->check = 0;296296+}297297+298298+static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,299299+ __be32 saddr, __be32 daddr, __u8 proto,300300+ __u8 tos, __u8 ttl, __be16 df, bool xnet)301301+{302302+ struct iphdr *iph;303303+304304+ skb_scrub_packet(skb, xnet);305305+306306+ skb_clear_hash(skb);307307+ skb_dst_set(skb, dst);308308+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));309309+310310+ skb_push(skb, sizeof(struct iphdr));311311+ skb_reset_network_header(skb);312312+313313+ iph = ip_hdr(skb);314314+315315+ iph->version = IPVERSION;316316+ iph->ihl = sizeof(struct iphdr) >> 2;317317+ iph->frag_off = df;318318+ iph->protocol = proto;319319+ iph->tos = tos;320320+ iph->daddr = daddr;321321+ iph->saddr = saddr;322322+ iph->ttl = ttl;323323+ __ip_select_ident(dev_net(dst->dev), iph,324324+ skb_shinfo(skb)->gso_segs ?: 1);325325+ iph->tot_len = htons(skb->len);326326+ ip_send_check(iph);327327+}328328+329329+static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,330330+ struct in6_addr *saddr, struct in6_addr *daddr,331331+ __u8 proto, __u8 prio, __u8 ttl)332332+{333333+ struct ipv6hdr *ip6h;334334+335335+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));336336+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED337337+ | IPSKB_REROUTED);338338+ skb_dst_set(skb, dst);339339+340340+ __skb_push(skb, sizeof(*ip6h));341341+ skb_reset_network_header(skb);342342+ ip6h = ipv6_hdr(skb);343343+ ip6_flow_hdr(ip6h, prio, htonl(0));344344+ ip6h->payload_len = htons(skb->len);345345+ ip6h->nexthdr = proto;346346+ ip6h->hop_limit = ttl;347347+ ip6h->daddr = *daddr;348348+ ip6h->saddr = *saddr;349349+ ip6h->payload_len = htons(skb->len - sizeof(*ip6h));350350+}351351+352352+static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)353353+{354354+ struct dst_entry *dst;355355+ bool xnet = false;356356+ __be16 df = htons(IP_DF);357357+ struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;358358+ struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;359359+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);360360+361361+ dst = rxe_find_route4(rxe->ndev, saddr, daddr);362362+ if (!dst) {363363+ pr_err("Host not reachable\n");364364+ return -EHOSTUNREACH;365365+ }366366+367367+ if (!memcmp(saddr, daddr, sizeof(*daddr)))368368+ pkt->mask |= RXE_LOOPBACK_MASK;369369+370370+ prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),371371+ htons(ROCE_V2_UDP_DPORT));372372+373373+ prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,374374+ av->grh.traffic_class, av->grh.hop_limit, df, xnet);375375+ return 0;376376+}377377+378378+static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)379379+{380380+ struct dst_entry *dst;381381+ struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;382382+ struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;383383+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);384384+385385+ dst = rxe_find_route6(rxe->ndev, saddr, daddr);386386+ if (!dst) {387387+ pr_err("Host not reachable\n");388388+ return -EHOSTUNREACH;389389+ }390390+391391+ if (!memcmp(saddr, daddr, sizeof(*daddr)))392392+ pkt->mask |= RXE_LOOPBACK_MASK;393393+394394+ prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),395395+ htons(ROCE_V2_UDP_DPORT));396396+397397+ prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,398398+ av->grh.traffic_class,399399+ av->grh.hop_limit);400400+ return 0;401401+}402402+403403+static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,404404+ struct sk_buff *skb, u32 *crc)405405+{406406+ int err = 0;407407+ struct rxe_av *av = rxe_get_av(pkt);408408+409409+ if (av->network_type == RDMA_NETWORK_IPV4)410410+ err = prepare4(rxe, skb, av);411411+ else if (av->network_type == RDMA_NETWORK_IPV6)412412+ err = prepare6(rxe, skb, av);413413+414414+ *crc = rxe_icrc_hdr(pkt, skb);415415+416416+ return err;417417+}418418+419419+static void rxe_skb_tx_dtor(struct sk_buff *skb)420420+{421421+ struct sock *sk = skb->sk;422422+ struct rxe_qp *qp = sk->sk_user_data;423423+ int skb_out = atomic_dec_return(&qp->skb_out);424424+425425+ if (unlikely(qp->need_req_skb &&426426+ skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))427427+ rxe_run_task(&qp->req.task, 1);428428+}429429+430430+static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,431431+ struct sk_buff *skb)432432+{433433+ struct sk_buff *nskb;434434+ struct rxe_av *av;435435+ int err;436436+437437+ av = rxe_get_av(pkt);438438+439439+ nskb = skb_clone(skb, GFP_ATOMIC);440440+ if (!nskb)441441+ return -ENOMEM;442442+443443+ nskb->destructor = rxe_skb_tx_dtor;444444+ nskb->sk = pkt->qp->sk->sk;445445+446446+ if (av->network_type == RDMA_NETWORK_IPV4) {447447+ err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);448448+ } else if (av->network_type == RDMA_NETWORK_IPV6) {449449+ err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);450450+ } else {451451+ pr_err("Unknown layer 3 protocol: %d\n", av->network_type);452452+ kfree_skb(nskb);453453+ return -EINVAL;454454+ }455455+456456+ if (unlikely(net_xmit_eval(err))) {457457+ pr_debug("error sending packet: %d\n", err);458458+ return -EAGAIN;459459+ }460460+461461+ kfree_skb(skb);462462+463463+ return 0;464464+}465465+466466+static int loopback(struct sk_buff *skb)467467+{468468+ return rxe_rcv(skb);469469+}470470+471471+static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)472472+{473473+ return rxe->port.port_guid == av->grh.dgid.global.interface_id;474474+}475475+476476+static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,477477+ int paylen, struct rxe_pkt_info *pkt)478478+{479479+ unsigned int hdr_len;480480+ struct sk_buff *skb;481481+482482+ if (av->network_type == RDMA_NETWORK_IPV4)483483+ hdr_len = ETH_HLEN + sizeof(struct udphdr) +484484+ sizeof(struct iphdr);485485+ else486486+ hdr_len = ETH_HLEN + sizeof(struct udphdr) +487487+ sizeof(struct ipv6hdr);488488+489489+ skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev),490490+ GFP_ATOMIC);491491+ if (unlikely(!skb))492492+ return NULL;493493+494494+ skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));495495+496496+ skb->dev = rxe->ndev;497497+ if (av->network_type == RDMA_NETWORK_IPV4)498498+ skb->protocol = htons(ETH_P_IP);499499+ else500500+ skb->protocol = htons(ETH_P_IPV6);501501+502502+ pkt->rxe = rxe;503503+ pkt->port_num = 1;504504+ pkt->hdr = skb_put(skb, paylen);505505+ pkt->mask |= RXE_GRH_MASK;506506+507507+ memset(pkt->hdr, 0, paylen);508508+509509+ return skb;510510+}511511+512512+/*513513+ * this is required by rxe_cfg to match rxe devices in514514+ * /sys/class/infiniband up with their underlying ethernet devices515515+ */516516+static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)517517+{518518+ return rxe->ndev->name;519519+}520520+521521+static enum rdma_link_layer link_layer(struct rxe_dev *rxe,522522+ unsigned int port_num)523523+{524524+ return IB_LINK_LAYER_ETHERNET;525525+}526526+527527+static struct rxe_ifc_ops ifc_ops = {528528+ .node_guid = node_guid,529529+ .port_guid = port_guid,530530+ .dma_device = dma_device,531531+ .mcast_add = mcast_add,532532+ .mcast_delete = mcast_delete,533533+ .prepare = prepare,534534+ .send = send,535535+ .loopback = loopback,536536+ .init_packet = init_packet,537537+ .parent_name = parent_name,538538+ .link_layer = link_layer,539539+};540540+541541+struct rxe_dev *rxe_net_add(struct net_device *ndev)542542+{543543+ int err;544544+ struct rxe_dev *rxe = NULL;545545+546546+ rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));547547+ if (!rxe)548548+ return NULL;549549+550550+ rxe->ifc_ops = &ifc_ops;551551+ rxe->ndev = ndev;552552+553553+ err = rxe_add(rxe, ndev->mtu);554554+ if (err) {555555+ ib_dealloc_device(&rxe->ib_dev);556556+ return NULL;557557+ }558558+559559+ spin_lock_bh(&dev_list_lock);560560+ list_add_tail(&rxe_dev_list, &rxe->list);561561+ spin_unlock_bh(&dev_list_lock);562562+ return rxe;563563+}564564+565565+void rxe_remove_all(void)566566+{567567+ spin_lock_bh(&dev_list_lock);568568+ while (!list_empty(&rxe_dev_list)) {569569+ struct rxe_dev *rxe =570570+ list_first_entry(&rxe_dev_list, struct rxe_dev, list);571571+572572+ list_del(&rxe->list);573573+ spin_unlock_bh(&dev_list_lock);574574+ rxe_remove(rxe);575575+ spin_lock_bh(&dev_list_lock);576576+ }577577+ spin_unlock_bh(&dev_list_lock);578578+}579579+EXPORT_SYMBOL(rxe_remove_all);580580+581581+static void rxe_port_event(struct rxe_dev *rxe,582582+ enum ib_event_type event)583583+{584584+ struct ib_event ev;585585+586586+ ev.device = &rxe->ib_dev;587587+ ev.element.port_num = 1;588588+ ev.event = event;589589+590590+ ib_dispatch_event(&ev);591591+}592592+593593+/* Caller must hold net_info_lock */594594+void rxe_port_up(struct rxe_dev *rxe)595595+{596596+ struct rxe_port *port;597597+598598+ port = &rxe->port;599599+ port->attr.state = IB_PORT_ACTIVE;600600+ port->attr.phys_state = IB_PHYS_STATE_LINK_UP;601601+602602+ rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);603603+ pr_info("rxe: set %s active\n", rxe->ib_dev.name);604604+ return;605605+}606606+607607+/* Caller must hold net_info_lock */608608+void rxe_port_down(struct rxe_dev *rxe)609609+{610610+ struct rxe_port *port;611611+612612+ port = &rxe->port;613613+ port->attr.state = IB_PORT_DOWN;614614+ port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;615615+616616+ rxe_port_event(rxe, IB_EVENT_PORT_ERR);617617+ pr_info("rxe: set %s down\n", rxe->ib_dev.name);618618+ return;619619+}620620+621621+static int rxe_notify(struct notifier_block *not_blk,622622+ unsigned long event,623623+ void *arg)624624+{625625+ struct net_device *ndev = netdev_notifier_info_to_dev(arg);626626+ struct rxe_dev *rxe = net_to_rxe(ndev);627627+628628+ if (!rxe)629629+ goto out;630630+631631+ switch (event) {632632+ case NETDEV_UNREGISTER:633633+ list_del(&rxe->list);634634+ rxe_remove(rxe);635635+ break;636636+ case NETDEV_UP:637637+ rxe_port_up(rxe);638638+ break;639639+ case NETDEV_DOWN:640640+ rxe_port_down(rxe);641641+ break;642642+ case NETDEV_CHANGEMTU:643643+ pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu);644644+ rxe_set_mtu(rxe, ndev->mtu);645645+ break;646646+ case NETDEV_REBOOT:647647+ case NETDEV_CHANGE:648648+ case NETDEV_GOING_DOWN:649649+ case NETDEV_CHANGEADDR:650650+ case NETDEV_CHANGENAME:651651+ case NETDEV_FEAT_CHANGE:652652+ default:653653+ pr_info("rxe: ignoring netdev event = %ld for %s\n",654654+ event, ndev->name);655655+ break;656656+ }657657+out:658658+ return NOTIFY_OK;659659+}660660+661661+static struct notifier_block rxe_net_notifier = {662662+ .notifier_call = rxe_notify,663663+};664664+665665+int rxe_net_init(void)666666+{667667+ int err;668668+669669+ spin_lock_init(&dev_list_lock);670670+671671+ recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,672672+ htons(ROCE_V2_UDP_DPORT), true);673673+ if (IS_ERR(recv_sockets.sk6)) {674674+ recv_sockets.sk6 = NULL;675675+ pr_err("rxe: Failed to create IPv6 UDP tunnel\n");676676+ return -1;677677+ }678678+679679+ recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,680680+ htons(ROCE_V2_UDP_DPORT), false);681681+ if (IS_ERR(recv_sockets.sk4)) {682682+ rxe_release_udp_tunnel(recv_sockets.sk6);683683+ recv_sockets.sk4 = NULL;684684+ recv_sockets.sk6 = NULL;685685+ pr_err("rxe: Failed to create IPv4 UDP tunnel\n");686686+ return -1;687687+ }688688+689689+ err = register_netdevice_notifier(&rxe_net_notifier);690690+ if (err) {691691+ rxe_release_udp_tunnel(recv_sockets.sk6);692692+ rxe_release_udp_tunnel(recv_sockets.sk4);693693+ pr_err("rxe: Failed to rigister netdev notifier\n");694694+ }695695+696696+ return err;697697+}698698+699699+void rxe_net_exit(void)700700+{701701+ if (recv_sockets.sk6)702702+ rxe_release_udp_tunnel(recv_sockets.sk6);703703+704704+ if (recv_sockets.sk4)705705+ rxe_release_udp_tunnel(recv_sockets.sk4);706706+707707+ unregister_netdevice_notifier(&rxe_net_notifier);708708+}
+53
drivers/infiniband/sw/rxe/rxe_net.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_NET_H3535+#define RXE_NET_H3636+3737+#include <net/sock.h>3838+#include <net/if_inet6.h>3939+#include <linux/module.h>4040+4141+struct rxe_recv_sockets {4242+ struct socket *sk4;4343+ struct socket *sk6;4444+};4545+4646+extern struct rxe_recv_sockets recv_sockets;4747+4848+struct rxe_dev *rxe_net_add(struct net_device *ndev);4949+5050+int rxe_net_init(void);5151+void rxe_net_exit(void);5252+5353+#endif /* RXE_NET_H */
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_OPCODE_H3535+#define RXE_OPCODE_H3636+3737+/*3838+ * contains header bit mask definitions and header lengths3939+ * declaration of the rxe_opcode_info struct and4040+ * rxe_wr_opcode_info struct4141+ */4242+4343+enum rxe_wr_mask {4444+ WR_INLINE_MASK = BIT(0),4545+ WR_ATOMIC_MASK = BIT(1),4646+ WR_SEND_MASK = BIT(2),4747+ WR_READ_MASK = BIT(3),4848+ WR_WRITE_MASK = BIT(4),4949+ WR_LOCAL_MASK = BIT(5),5050+ WR_REG_MASK = BIT(6),5151+5252+ WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,5353+ WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,5454+ WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,5555+ WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK,5656+};5757+5858+#define WR_MAX_QPT (8)5959+6060+struct rxe_wr_opcode_info {6161+ char *name;6262+ enum rxe_wr_mask mask[WR_MAX_QPT];6363+};6464+6565+extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];6666+6767+enum rxe_hdr_type {6868+ RXE_LRH,6969+ RXE_GRH,7070+ RXE_BTH,7171+ RXE_RETH,7272+ RXE_AETH,7373+ RXE_ATMETH,7474+ RXE_ATMACK,7575+ RXE_IETH,7676+ RXE_RDETH,7777+ RXE_DETH,7878+ RXE_IMMDT,7979+ RXE_PAYLOAD,8080+ NUM_HDR_TYPES8181+};8282+8383+enum rxe_hdr_mask {8484+ RXE_LRH_MASK = BIT(RXE_LRH),8585+ RXE_GRH_MASK = BIT(RXE_GRH),8686+ RXE_BTH_MASK = BIT(RXE_BTH),8787+ RXE_IMMDT_MASK = BIT(RXE_IMMDT),8888+ RXE_RETH_MASK = BIT(RXE_RETH),8989+ RXE_AETH_MASK = BIT(RXE_AETH),9090+ RXE_ATMETH_MASK = BIT(RXE_ATMETH),9191+ RXE_ATMACK_MASK = BIT(RXE_ATMACK),9292+ RXE_IETH_MASK = BIT(RXE_IETH),9393+ RXE_RDETH_MASK = BIT(RXE_RDETH),9494+ RXE_DETH_MASK = BIT(RXE_DETH),9595+ RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),9696+9797+ RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),9898+ RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1),9999+ RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2),100100+ RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),101101+ RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),102102+ RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),103103+104104+ RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),105105+ RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),106106+107107+ RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),108108+ RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),109109+ RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),110110+111111+ RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),112112+113113+ RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK),114114+ RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK),115115+};116116+117117+#define OPCODE_NONE (-1)118118+#define RXE_NUM_OPCODE 256119119+120120+struct rxe_opcode_info {121121+ char *name;122122+ enum rxe_hdr_mask mask;123123+ int length;124124+ int offset[NUM_HDR_TYPES];125125+};126126+127127+extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE];128128+129129+#endif /* RXE_OPCODE_H */
+172
drivers/infiniband/sw/rxe/rxe_param.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_PARAM_H3535+#define RXE_PARAM_H3636+3737+static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)3838+{3939+ if (mtu < 256)4040+ return 0;4141+ else if (mtu < 512)4242+ return IB_MTU_256;4343+ else if (mtu < 1024)4444+ return IB_MTU_512;4545+ else if (mtu < 2048)4646+ return IB_MTU_1024;4747+ else if (mtu < 4096)4848+ return IB_MTU_2048;4949+ else5050+ return IB_MTU_4096;5151+}5252+5353+/* Find the IB mtu for a given network MTU. */5454+static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)5555+{5656+ mtu -= RXE_MAX_HDR_LENGTH;5757+5858+ return rxe_mtu_int_to_enum(mtu);5959+}6060+6161+/* default/initial rxe device parameter settings */6262+enum rxe_device_param {6363+ RXE_FW_VER = 0,6464+ RXE_MAX_MR_SIZE = -1ull,6565+ RXE_PAGE_SIZE_CAP = 0xfffff000,6666+ RXE_VENDOR_ID = 0,6767+ RXE_VENDOR_PART_ID = 0,6868+ RXE_HW_VER = 0,6969+ RXE_MAX_QP = 0x10000,7070+ RXE_MAX_QP_WR = 0x4000,7171+ RXE_MAX_INLINE_DATA = 400,7272+ RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR7373+ | IB_DEVICE_BAD_QKEY_CNTR7474+ | IB_DEVICE_AUTO_PATH_MIG7575+ | IB_DEVICE_CHANGE_PHY_PORT7676+ | IB_DEVICE_UD_AV_PORT_ENFORCE7777+ | IB_DEVICE_PORT_ACTIVE_EVENT7878+ | IB_DEVICE_SYS_IMAGE_GUID7979+ | IB_DEVICE_RC_RNR_NAK_GEN8080+ | IB_DEVICE_SRQ_RESIZE8181+ | IB_DEVICE_MEM_MGT_EXTENSIONS,8282+ RXE_MAX_SGE = 32,8383+ RXE_MAX_SGE_RD = 32,8484+ RXE_MAX_CQ = 16384,8585+ RXE_MAX_LOG_CQE = 13,8686+ RXE_MAX_MR = 2 * 1024,8787+ RXE_MAX_PD = 0x7ffc,8888+ RXE_MAX_QP_RD_ATOM = 128,8989+ RXE_MAX_EE_RD_ATOM = 0,9090+ RXE_MAX_RES_RD_ATOM = 0x3f000,9191+ RXE_MAX_QP_INIT_RD_ATOM = 128,9292+ RXE_MAX_EE_INIT_RD_ATOM = 0,9393+ RXE_ATOMIC_CAP = 1,9494+ RXE_MAX_EE = 0,9595+ RXE_MAX_RDD = 0,9696+ RXE_MAX_MW = 0,9797+ RXE_MAX_RAW_IPV6_QP = 0,9898+ RXE_MAX_RAW_ETHY_QP = 0,9999+ RXE_MAX_MCAST_GRP = 8192,100100+ RXE_MAX_MCAST_QP_ATTACH = 56,101101+ RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000,102102+ RXE_MAX_AH = 100,103103+ RXE_MAX_FMR = 0,104104+ RXE_MAX_MAP_PER_FMR = 0,105105+ RXE_MAX_SRQ = 960,106106+ RXE_MAX_SRQ_WR = 0x4000,107107+ RXE_MIN_SRQ_WR = 1,108108+ RXE_MAX_SRQ_SGE = 27,109109+ RXE_MIN_SRQ_SGE = 1,110110+ RXE_MAX_FMR_PAGE_LIST_LEN = 512,111111+ RXE_MAX_PKEYS = 64,112112+ RXE_LOCAL_CA_ACK_DELAY = 15,113113+114114+ RXE_MAX_UCONTEXT = 512,115115+116116+ RXE_NUM_PORT = 1,117117+ RXE_NUM_COMP_VECTORS = 1,118118+119119+ RXE_MIN_QP_INDEX = 16,120120+ RXE_MAX_QP_INDEX = 0x00020000,121121+122122+ RXE_MIN_SRQ_INDEX = 0x00020001,123123+ RXE_MAX_SRQ_INDEX = 0x00040000,124124+125125+ RXE_MIN_MR_INDEX = 0x00000001,126126+ RXE_MAX_MR_INDEX = 0x00040000,127127+ RXE_MIN_MW_INDEX = 0x00040001,128128+ RXE_MAX_MW_INDEX = 0x00060000,129129+ RXE_MAX_PKT_PER_ACK = 64,130130+131131+ RXE_MAX_UNACKED_PSNS = 128,132132+133133+ /* Max inflight SKBs per queue pair */134134+ RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,135135+ RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,136136+137137+ /* Delay before calling arbiter timer */138138+ RXE_NSEC_ARB_TIMER_DELAY = 200,139139+};140140+141141+/* default/initial rxe port parameters */142142+enum rxe_port_param {143143+ RXE_PORT_STATE = IB_PORT_DOWN,144144+ RXE_PORT_MAX_MTU = IB_MTU_4096,145145+ RXE_PORT_ACTIVE_MTU = IB_MTU_256,146146+ RXE_PORT_GID_TBL_LEN = 1024,147147+ RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,148148+ RXE_PORT_MAX_MSG_SZ = 0x800000,149149+ RXE_PORT_BAD_PKEY_CNTR = 0,150150+ RXE_PORT_QKEY_VIOL_CNTR = 0,151151+ RXE_PORT_LID = 0,152152+ RXE_PORT_SM_LID = 0,153153+ RXE_PORT_SM_SL = 0,154154+ RXE_PORT_LMC = 0,155155+ RXE_PORT_MAX_VL_NUM = 1,156156+ RXE_PORT_SUBNET_TIMEOUT = 0,157157+ RXE_PORT_INIT_TYPE_REPLY = 0,158158+ RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X,159159+ RXE_PORT_ACTIVE_SPEED = 1,160160+ RXE_PORT_PKEY_TBL_LEN = 64,161161+ RXE_PORT_PHYS_STATE = 2,162162+ RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL,163163+};164164+165165+/* default/initial port info parameters */166166+enum rxe_port_info_param {167167+ RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */168168+ RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */169169+ RXE_PORT_INFO_OPER_VL = 1, /* 1 */170170+};171171+172172+#endif /* RXE_PARAM_H */
+502
drivers/infiniband/sw/rxe/rxe_pool.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+3737+/* info about object pools3838+ * note that mr and mw share a single index space3939+ * so that one can map an lkey to the correct type of object4040+ */4141+struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {4242+ [RXE_TYPE_UC] = {4343+ .name = "rxe-uc",4444+ .size = sizeof(struct rxe_ucontext),4545+ },4646+ [RXE_TYPE_PD] = {4747+ .name = "rxe-pd",4848+ .size = sizeof(struct rxe_pd),4949+ },5050+ [RXE_TYPE_AH] = {5151+ .name = "rxe-ah",5252+ .size = sizeof(struct rxe_ah),5353+ .flags = RXE_POOL_ATOMIC,5454+ },5555+ [RXE_TYPE_SRQ] = {5656+ .name = "rxe-srq",5757+ .size = sizeof(struct rxe_srq),5858+ .flags = RXE_POOL_INDEX,5959+ .min_index = RXE_MIN_SRQ_INDEX,6060+ .max_index = RXE_MAX_SRQ_INDEX,6161+ },6262+ [RXE_TYPE_QP] = {6363+ .name = "rxe-qp",6464+ .size = sizeof(struct rxe_qp),6565+ .cleanup = rxe_qp_cleanup,6666+ .flags = RXE_POOL_INDEX,6767+ .min_index = RXE_MIN_QP_INDEX,6868+ .max_index = RXE_MAX_QP_INDEX,6969+ },7070+ [RXE_TYPE_CQ] = {7171+ .name = "rxe-cq",7272+ .size = sizeof(struct rxe_cq),7373+ .cleanup = rxe_cq_cleanup,7474+ },7575+ [RXE_TYPE_MR] = {7676+ .name = "rxe-mr",7777+ .size = sizeof(struct rxe_mem),7878+ .cleanup = rxe_mem_cleanup,7979+ .flags = RXE_POOL_INDEX,8080+ .max_index = RXE_MAX_MR_INDEX,8181+ .min_index = RXE_MIN_MR_INDEX,8282+ },8383+ [RXE_TYPE_MW] = {8484+ .name = "rxe-mw",8585+ .size = sizeof(struct rxe_mem),8686+ .flags = RXE_POOL_INDEX,8787+ .max_index = RXE_MAX_MW_INDEX,8888+ .min_index = RXE_MIN_MW_INDEX,8989+ },9090+ [RXE_TYPE_MC_GRP] = {9191+ .name = "rxe-mc_grp",9292+ .size = sizeof(struct rxe_mc_grp),9393+ .cleanup = rxe_mc_cleanup,9494+ .flags = RXE_POOL_KEY,9595+ .key_offset = offsetof(struct rxe_mc_grp, mgid),9696+ .key_size = sizeof(union ib_gid),9797+ },9898+ [RXE_TYPE_MC_ELEM] = {9999+ .name = "rxe-mc_elem",100100+ .size = sizeof(struct rxe_mc_elem),101101+ .flags = RXE_POOL_ATOMIC,102102+ },103103+};104104+105105+static inline char *pool_name(struct rxe_pool *pool)106106+{107107+ return rxe_type_info[pool->type].name;108108+}109109+110110+static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)111111+{112112+ return rxe_type_info[pool->type].cache;113113+}114114+115115+static inline enum rxe_elem_type rxe_type(void *arg)116116+{117117+ struct rxe_pool_entry *elem = arg;118118+119119+ return elem->pool->type;120120+}121121+122122+int rxe_cache_init(void)123123+{124124+ int err;125125+ int i;126126+ size_t size;127127+ struct rxe_type_info *type;128128+129129+ for (i = 0; i < RXE_NUM_TYPES; i++) {130130+ type = &rxe_type_info[i];131131+ size = ALIGN(type->size, RXE_POOL_ALIGN);132132+ type->cache = kmem_cache_create(type->name, size,133133+ RXE_POOL_ALIGN,134134+ RXE_POOL_CACHE_FLAGS, NULL);135135+ if (!type->cache) {136136+ pr_err("Unable to init kmem cache for %s\n",137137+ type->name);138138+ err = -ENOMEM;139139+ goto err1;140140+ }141141+ }142142+143143+ return 0;144144+145145+err1:146146+ while (--i >= 0) {147147+ kmem_cache_destroy(type->cache);148148+ type->cache = NULL;149149+ }150150+151151+ return err;152152+}153153+154154+void rxe_cache_exit(void)155155+{156156+ int i;157157+ struct rxe_type_info *type;158158+159159+ for (i = 0; i < RXE_NUM_TYPES; i++) {160160+ type = &rxe_type_info[i];161161+ kmem_cache_destroy(type->cache);162162+ type->cache = NULL;163163+ }164164+}165165+166166+static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)167167+{168168+ int err = 0;169169+ size_t size;170170+171171+ if ((max - min + 1) < pool->max_elem) {172172+ pr_warn("not enough indices for max_elem\n");173173+ err = -EINVAL;174174+ goto out;175175+ }176176+177177+ pool->max_index = max;178178+ pool->min_index = min;179179+180180+ size = BITS_TO_LONGS(max - min + 1) * sizeof(long);181181+ pool->table = kmalloc(size, GFP_KERNEL);182182+ if (!pool->table) {183183+ pr_warn("no memory for bit table\n");184184+ err = -ENOMEM;185185+ goto out;186186+ }187187+188188+ pool->table_size = size;189189+ bitmap_zero(pool->table, max - min + 1);190190+191191+out:192192+ return err;193193+}194194+195195+int rxe_pool_init(196196+ struct rxe_dev *rxe,197197+ struct rxe_pool *pool,198198+ enum rxe_elem_type type,199199+ unsigned max_elem)200200+{201201+ int err = 0;202202+ size_t size = rxe_type_info[type].size;203203+204204+ memset(pool, 0, sizeof(*pool));205205+206206+ pool->rxe = rxe;207207+ pool->type = type;208208+ pool->max_elem = max_elem;209209+ pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);210210+ pool->flags = rxe_type_info[type].flags;211211+ pool->tree = RB_ROOT;212212+ pool->cleanup = rxe_type_info[type].cleanup;213213+214214+ atomic_set(&pool->num_elem, 0);215215+216216+ kref_init(&pool->ref_cnt);217217+218218+ spin_lock_init(&pool->pool_lock);219219+220220+ if (rxe_type_info[type].flags & RXE_POOL_INDEX) {221221+ err = rxe_pool_init_index(pool,222222+ rxe_type_info[type].max_index,223223+ rxe_type_info[type].min_index);224224+ if (err)225225+ goto out;226226+ }227227+228228+ if (rxe_type_info[type].flags & RXE_POOL_KEY) {229229+ pool->key_offset = rxe_type_info[type].key_offset;230230+ pool->key_size = rxe_type_info[type].key_size;231231+ }232232+233233+ pool->state = rxe_pool_valid;234234+235235+out:236236+ return err;237237+}238238+239239+static void rxe_pool_release(struct kref *kref)240240+{241241+ struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);242242+243243+ pool->state = rxe_pool_invalid;244244+ kfree(pool->table);245245+}246246+247247+static void rxe_pool_put(struct rxe_pool *pool)248248+{249249+ kref_put(&pool->ref_cnt, rxe_pool_release);250250+}251251+252252+int rxe_pool_cleanup(struct rxe_pool *pool)253253+{254254+ unsigned long flags;255255+256256+ spin_lock_irqsave(&pool->pool_lock, flags);257257+ pool->state = rxe_pool_invalid;258258+ if (atomic_read(&pool->num_elem) > 0)259259+ pr_warn("%s pool destroyed with unfree'd elem\n",260260+ pool_name(pool));261261+ spin_unlock_irqrestore(&pool->pool_lock, flags);262262+263263+ rxe_pool_put(pool);264264+265265+ return 0;266266+}267267+268268+static u32 alloc_index(struct rxe_pool *pool)269269+{270270+ u32 index;271271+ u32 range = pool->max_index - pool->min_index + 1;272272+273273+ index = find_next_zero_bit(pool->table, range, pool->last);274274+ if (index >= range)275275+ index = find_first_zero_bit(pool->table, range);276276+277277+ set_bit(index, pool->table);278278+ pool->last = index;279279+ return index + pool->min_index;280280+}281281+282282+static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)283283+{284284+ struct rb_node **link = &pool->tree.rb_node;285285+ struct rb_node *parent = NULL;286286+ struct rxe_pool_entry *elem;287287+288288+ while (*link) {289289+ parent = *link;290290+ elem = rb_entry(parent, struct rxe_pool_entry, node);291291+292292+ if (elem->index == new->index) {293293+ pr_warn("element already exists!\n");294294+ goto out;295295+ }296296+297297+ if (elem->index > new->index)298298+ link = &(*link)->rb_left;299299+ else300300+ link = &(*link)->rb_right;301301+ }302302+303303+ rb_link_node(&new->node, parent, link);304304+ rb_insert_color(&new->node, &pool->tree);305305+out:306306+ return;307307+}308308+309309+static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)310310+{311311+ struct rb_node **link = &pool->tree.rb_node;312312+ struct rb_node *parent = NULL;313313+ struct rxe_pool_entry *elem;314314+ int cmp;315315+316316+ while (*link) {317317+ parent = *link;318318+ elem = rb_entry(parent, struct rxe_pool_entry, node);319319+320320+ cmp = memcmp((u8 *)elem + pool->key_offset,321321+ (u8 *)new + pool->key_offset, pool->key_size);322322+323323+ if (cmp == 0) {324324+ pr_warn("key already exists!\n");325325+ goto out;326326+ }327327+328328+ if (cmp > 0)329329+ link = &(*link)->rb_left;330330+ else331331+ link = &(*link)->rb_right;332332+ }333333+334334+ rb_link_node(&new->node, parent, link);335335+ rb_insert_color(&new->node, &pool->tree);336336+out:337337+ return;338338+}339339+340340+void rxe_add_key(void *arg, void *key)341341+{342342+ struct rxe_pool_entry *elem = arg;343343+ struct rxe_pool *pool = elem->pool;344344+ unsigned long flags;345345+346346+ spin_lock_irqsave(&pool->pool_lock, flags);347347+ memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);348348+ insert_key(pool, elem);349349+ spin_unlock_irqrestore(&pool->pool_lock, flags);350350+}351351+352352+void rxe_drop_key(void *arg)353353+{354354+ struct rxe_pool_entry *elem = arg;355355+ struct rxe_pool *pool = elem->pool;356356+ unsigned long flags;357357+358358+ spin_lock_irqsave(&pool->pool_lock, flags);359359+ rb_erase(&elem->node, &pool->tree);360360+ spin_unlock_irqrestore(&pool->pool_lock, flags);361361+}362362+363363+void rxe_add_index(void *arg)364364+{365365+ struct rxe_pool_entry *elem = arg;366366+ struct rxe_pool *pool = elem->pool;367367+ unsigned long flags;368368+369369+ spin_lock_irqsave(&pool->pool_lock, flags);370370+ elem->index = alloc_index(pool);371371+ insert_index(pool, elem);372372+ spin_unlock_irqrestore(&pool->pool_lock, flags);373373+}374374+375375+void rxe_drop_index(void *arg)376376+{377377+ struct rxe_pool_entry *elem = arg;378378+ struct rxe_pool *pool = elem->pool;379379+ unsigned long flags;380380+381381+ spin_lock_irqsave(&pool->pool_lock, flags);382382+ clear_bit(elem->index - pool->min_index, pool->table);383383+ rb_erase(&elem->node, &pool->tree);384384+ spin_unlock_irqrestore(&pool->pool_lock, flags);385385+}386386+387387+void *rxe_alloc(struct rxe_pool *pool)388388+{389389+ struct rxe_pool_entry *elem;390390+ unsigned long flags;391391+392392+ might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));393393+394394+ spin_lock_irqsave(&pool->pool_lock, flags);395395+ if (pool->state != rxe_pool_valid) {396396+ spin_unlock_irqrestore(&pool->pool_lock, flags);397397+ return NULL;398398+ }399399+ kref_get(&pool->ref_cnt);400400+ spin_unlock_irqrestore(&pool->pool_lock, flags);401401+402402+ kref_get(&pool->rxe->ref_cnt);403403+404404+ if (atomic_inc_return(&pool->num_elem) > pool->max_elem) {405405+ atomic_dec(&pool->num_elem);406406+ rxe_dev_put(pool->rxe);407407+ rxe_pool_put(pool);408408+ return NULL;409409+ }410410+411411+ elem = kmem_cache_zalloc(pool_cache(pool),412412+ (pool->flags & RXE_POOL_ATOMIC) ?413413+ GFP_ATOMIC : GFP_KERNEL);414414+415415+ elem->pool = pool;416416+ kref_init(&elem->ref_cnt);417417+418418+ return elem;419419+}420420+421421+void rxe_elem_release(struct kref *kref)422422+{423423+ struct rxe_pool_entry *elem =424424+ container_of(kref, struct rxe_pool_entry, ref_cnt);425425+ struct rxe_pool *pool = elem->pool;426426+427427+ if (pool->cleanup)428428+ pool->cleanup(elem);429429+430430+ kmem_cache_free(pool_cache(pool), elem);431431+ atomic_dec(&pool->num_elem);432432+ rxe_dev_put(pool->rxe);433433+ rxe_pool_put(pool);434434+}435435+436436+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)437437+{438438+ struct rb_node *node = NULL;439439+ struct rxe_pool_entry *elem = NULL;440440+ unsigned long flags;441441+442442+ spin_lock_irqsave(&pool->pool_lock, flags);443443+444444+ if (pool->state != rxe_pool_valid)445445+ goto out;446446+447447+ node = pool->tree.rb_node;448448+449449+ while (node) {450450+ elem = rb_entry(node, struct rxe_pool_entry, node);451451+452452+ if (elem->index > index)453453+ node = node->rb_left;454454+ else if (elem->index < index)455455+ node = node->rb_right;456456+ else457457+ break;458458+ }459459+460460+ if (node)461461+ kref_get(&elem->ref_cnt);462462+463463+out:464464+ spin_unlock_irqrestore(&pool->pool_lock, flags);465465+ return node ? (void *)elem : NULL;466466+}467467+468468+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)469469+{470470+ struct rb_node *node = NULL;471471+ struct rxe_pool_entry *elem = NULL;472472+ int cmp;473473+ unsigned long flags;474474+475475+ spin_lock_irqsave(&pool->pool_lock, flags);476476+477477+ if (pool->state != rxe_pool_valid)478478+ goto out;479479+480480+ node = pool->tree.rb_node;481481+482482+ while (node) {483483+ elem = rb_entry(node, struct rxe_pool_entry, node);484484+485485+ cmp = memcmp((u8 *)elem + pool->key_offset,486486+ key, pool->key_size);487487+488488+ if (cmp > 0)489489+ node = node->rb_left;490490+ else if (cmp < 0)491491+ node = node->rb_right;492492+ else493493+ break;494494+ }495495+496496+ if (node)497497+ kref_get(&elem->ref_cnt);498498+499499+out:500500+ spin_unlock_irqrestore(&pool->pool_lock, flags);501501+ return node ? ((void *)elem) : NULL;502502+}
+163
drivers/infiniband/sw/rxe/rxe_pool.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_POOL_H3535+#define RXE_POOL_H3636+3737+#define RXE_POOL_ALIGN (16)3838+#define RXE_POOL_CACHE_FLAGS (0)3939+4040+enum rxe_pool_flags {4141+ RXE_POOL_ATOMIC = BIT(0),4242+ RXE_POOL_INDEX = BIT(1),4343+ RXE_POOL_KEY = BIT(2),4444+};4545+4646+enum rxe_elem_type {4747+ RXE_TYPE_UC,4848+ RXE_TYPE_PD,4949+ RXE_TYPE_AH,5050+ RXE_TYPE_SRQ,5151+ RXE_TYPE_QP,5252+ RXE_TYPE_CQ,5353+ RXE_TYPE_MR,5454+ RXE_TYPE_MW,5555+ RXE_TYPE_MC_GRP,5656+ RXE_TYPE_MC_ELEM,5757+ RXE_NUM_TYPES, /* keep me last */5858+};5959+6060+struct rxe_type_info {6161+ char *name;6262+ size_t size;6363+ void (*cleanup)(void *obj);6464+ enum rxe_pool_flags flags;6565+ u32 max_index;6666+ u32 min_index;6767+ size_t key_offset;6868+ size_t key_size;6969+ struct kmem_cache *cache;7070+};7171+7272+extern struct rxe_type_info rxe_type_info[];7373+7474+enum rxe_pool_state {7575+ rxe_pool_invalid,7676+ rxe_pool_valid,7777+};7878+7979+struct rxe_pool_entry {8080+ struct rxe_pool *pool;8181+ struct kref ref_cnt;8282+ struct list_head list;8383+8484+ /* only used if indexed or keyed */8585+ struct rb_node node;8686+ u32 index;8787+};8888+8989+struct rxe_pool {9090+ struct rxe_dev *rxe;9191+ spinlock_t pool_lock; /* pool spinlock */9292+ size_t elem_size;9393+ struct kref ref_cnt;9494+ void (*cleanup)(void *obj);9595+ enum rxe_pool_state state;9696+ enum rxe_pool_flags flags;9797+ enum rxe_elem_type type;9898+9999+ unsigned int max_elem;100100+ atomic_t num_elem;101101+102102+ /* only used if indexed or keyed */103103+ struct rb_root tree;104104+ unsigned long *table;105105+ size_t table_size;106106+ u32 max_index;107107+ u32 min_index;108108+ u32 last;109109+ size_t key_offset;110110+ size_t key_size;111111+};112112+113113+/* initialize slab caches for managed objects */114114+int rxe_cache_init(void);115115+116116+/* cleanup slab caches for managed objects */117117+void rxe_cache_exit(void);118118+119119+/* initialize a pool of objects with given limit on120120+ * number of elements. gets parameters from rxe_type_info121121+ * pool elements will be allocated out of a slab cache122122+ */123123+int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,124124+ enum rxe_elem_type type, u32 max_elem);125125+126126+/* free resources from object pool */127127+int rxe_pool_cleanup(struct rxe_pool *pool);128128+129129+/* allocate an object from pool */130130+void *rxe_alloc(struct rxe_pool *pool);131131+132132+/* assign an index to an indexed object and insert object into133133+ * pool's rb tree134134+ */135135+void rxe_add_index(void *elem);136136+137137+/* drop an index and remove object from rb tree */138138+void rxe_drop_index(void *elem);139139+140140+/* assign a key to a keyed object and insert object into141141+ * pool's rb tree142142+ */143143+void rxe_add_key(void *elem, void *key);144144+145145+/* remove elem from rb tree */146146+void rxe_drop_key(void *elem);147147+148148+/* lookup an indexed object from index. takes a reference on object */149149+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);150150+151151+/* lookup keyed object from key. takes a reference on the object */152152+void *rxe_pool_get_key(struct rxe_pool *pool, void *key);153153+154154+/* cleanup an object when all references are dropped */155155+void rxe_elem_release(struct kref *kref);156156+157157+/* take a reference on an object */158158+#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)159159+160160+/* drop a reference on an object */161161+#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)162162+163163+#endif /* RXE_POOL_H */
+851
drivers/infiniband/sw/rxe/rxe_qp.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+#include <linux/delay.h>3636+#include <linux/sched.h>3737+3838+#include "rxe.h"3939+#include "rxe_loc.h"4040+#include "rxe_queue.h"4141+#include "rxe_task.h"4242+4343+char *rxe_qp_state_name[] = {4444+ [QP_STATE_RESET] = "RESET",4545+ [QP_STATE_INIT] = "INIT",4646+ [QP_STATE_READY] = "READY",4747+ [QP_STATE_DRAIN] = "DRAIN",4848+ [QP_STATE_DRAINED] = "DRAINED",4949+ [QP_STATE_ERROR] = "ERROR",5050+};5151+5252+static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,5353+ int has_srq)5454+{5555+ if (cap->max_send_wr > rxe->attr.max_qp_wr) {5656+ pr_warn("invalid send wr = %d > %d\n",5757+ cap->max_send_wr, rxe->attr.max_qp_wr);5858+ goto err1;5959+ }6060+6161+ if (cap->max_send_sge > rxe->attr.max_sge) {6262+ pr_warn("invalid send sge = %d > %d\n",6363+ cap->max_send_sge, rxe->attr.max_sge);6464+ goto err1;6565+ }6666+6767+ if (!has_srq) {6868+ if (cap->max_recv_wr > rxe->attr.max_qp_wr) {6969+ pr_warn("invalid recv wr = %d > %d\n",7070+ cap->max_recv_wr, rxe->attr.max_qp_wr);7171+ goto err1;7272+ }7373+7474+ if (cap->max_recv_sge > rxe->attr.max_sge) {7575+ pr_warn("invalid recv sge = %d > %d\n",7676+ cap->max_recv_sge, rxe->attr.max_sge);7777+ goto err1;7878+ }7979+ }8080+8181+ if (cap->max_inline_data > rxe->max_inline_data) {8282+ pr_warn("invalid max inline data = %d > %d\n",8383+ cap->max_inline_data, rxe->max_inline_data);8484+ goto err1;8585+ }8686+8787+ return 0;8888+8989+err1:9090+ return -EINVAL;9191+}9292+9393+int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)9494+{9595+ struct ib_qp_cap *cap = &init->cap;9696+ struct rxe_port *port;9797+ int port_num = init->port_num;9898+9999+ if (!init->recv_cq || !init->send_cq) {100100+ pr_warn("missing cq\n");101101+ goto err1;102102+ }103103+104104+ if (rxe_qp_chk_cap(rxe, cap, !!init->srq))105105+ goto err1;106106+107107+ if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {108108+ if (port_num != 1) {109109+ pr_warn("invalid port = %d\n", port_num);110110+ goto err1;111111+ }112112+113113+ port = &rxe->port;114114+115115+ if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {116116+ pr_warn("SMI QP exists for port %d\n", port_num);117117+ goto err1;118118+ }119119+120120+ if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {121121+ pr_warn("GSI QP exists for port %d\n", port_num);122122+ goto err1;123123+ }124124+ }125125+126126+ return 0;127127+128128+err1:129129+ return -EINVAL;130130+}131131+132132+static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n)133133+{134134+ qp->resp.res_head = 0;135135+ qp->resp.res_tail = 0;136136+ qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL);137137+138138+ if (!qp->resp.resources)139139+ return -ENOMEM;140140+141141+ return 0;142142+}143143+144144+static void free_rd_atomic_resources(struct rxe_qp *qp)145145+{146146+ if (qp->resp.resources) {147147+ int i;148148+149149+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {150150+ struct resp_res *res = &qp->resp.resources[i];151151+152152+ free_rd_atomic_resource(qp, res);153153+ }154154+ kfree(qp->resp.resources);155155+ qp->resp.resources = NULL;156156+ }157157+}158158+159159+void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)160160+{161161+ if (res->type == RXE_ATOMIC_MASK) {162162+ rxe_drop_ref(qp);163163+ kfree_skb(res->atomic.skb);164164+ } else if (res->type == RXE_READ_MASK) {165165+ if (res->read.mr)166166+ rxe_drop_ref(res->read.mr);167167+ }168168+ res->type = 0;169169+}170170+171171+static void cleanup_rd_atomic_resources(struct rxe_qp *qp)172172+{173173+ int i;174174+ struct resp_res *res;175175+176176+ if (qp->resp.resources) {177177+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {178178+ res = &qp->resp.resources[i];179179+ free_rd_atomic_resource(qp, res);180180+ }181181+ }182182+}183183+184184+static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,185185+ struct ib_qp_init_attr *init)186186+{187187+ struct rxe_port *port;188188+ u32 qpn;189189+190190+ qp->sq_sig_type = init->sq_sig_type;191191+ qp->attr.path_mtu = 1;192192+ qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);193193+194194+ qpn = qp->pelem.index;195195+ port = &rxe->port;196196+197197+ switch (init->qp_type) {198198+ case IB_QPT_SMI:199199+ qp->ibqp.qp_num = 0;200200+ port->qp_smi_index = qpn;201201+ qp->attr.port_num = init->port_num;202202+ break;203203+204204+ case IB_QPT_GSI:205205+ qp->ibqp.qp_num = 1;206206+ port->qp_gsi_index = qpn;207207+ qp->attr.port_num = init->port_num;208208+ break;209209+210210+ default:211211+ qp->ibqp.qp_num = qpn;212212+ break;213213+ }214214+215215+ INIT_LIST_HEAD(&qp->grp_list);216216+217217+ skb_queue_head_init(&qp->send_pkts);218218+219219+ spin_lock_init(&qp->grp_lock);220220+ spin_lock_init(&qp->state_lock);221221+222222+ atomic_set(&qp->ssn, 0);223223+ atomic_set(&qp->skb_out, 0);224224+}225225+226226+static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,227227+ struct ib_qp_init_attr *init,228228+ struct ib_ucontext *context, struct ib_udata *udata)229229+{230230+ int err;231231+ int wqe_size;232232+233233+ err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);234234+ if (err < 0)235235+ return err;236236+ qp->sk->sk->sk_user_data = qp;237237+238238+ qp->sq.max_wr = init->cap.max_send_wr;239239+ qp->sq.max_sge = init->cap.max_send_sge;240240+ qp->sq.max_inline = init->cap.max_inline_data;241241+242242+ wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +243243+ qp->sq.max_sge * sizeof(struct ib_sge),244244+ sizeof(struct rxe_send_wqe) +245245+ qp->sq.max_inline);246246+247247+ qp->sq.queue = rxe_queue_init(rxe,248248+ &qp->sq.max_wr,249249+ wqe_size);250250+ if (!qp->sq.queue)251251+ return -ENOMEM;252252+253253+ err = do_mmap_info(rxe, udata, true,254254+ context, qp->sq.queue->buf,255255+ qp->sq.queue->buf_size, &qp->sq.queue->ip);256256+257257+ if (err) {258258+ kvfree(qp->sq.queue->buf);259259+ kfree(qp->sq.queue);260260+ return err;261261+ }262262+263263+ qp->req.wqe_index = producer_index(qp->sq.queue);264264+ qp->req.state = QP_STATE_RESET;265265+ qp->req.opcode = -1;266266+ qp->comp.opcode = -1;267267+268268+ spin_lock_init(&qp->sq.sq_lock);269269+ skb_queue_head_init(&qp->req_pkts);270270+271271+ rxe_init_task(rxe, &qp->req.task, qp,272272+ rxe_requester, "req");273273+ rxe_init_task(rxe, &qp->comp.task, qp,274274+ rxe_completer, "comp");275275+276276+ init_timer(&qp->rnr_nak_timer);277277+ qp->rnr_nak_timer.function = rnr_nak_timer;278278+ qp->rnr_nak_timer.data = (unsigned long)qp;279279+280280+ init_timer(&qp->retrans_timer);281281+ qp->retrans_timer.function = retransmit_timer;282282+ qp->retrans_timer.data = (unsigned long)qp;283283+ qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */284284+285285+ return 0;286286+}287287+288288+static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,289289+ struct ib_qp_init_attr *init,290290+ struct ib_ucontext *context, struct ib_udata *udata)291291+{292292+ int err;293293+ int wqe_size;294294+295295+ if (!qp->srq) {296296+ qp->rq.max_wr = init->cap.max_recv_wr;297297+ qp->rq.max_sge = init->cap.max_recv_sge;298298+299299+ wqe_size = rcv_wqe_size(qp->rq.max_sge);300300+301301+ pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n",302302+ qp->rq.max_wr, qp->rq.max_sge, wqe_size);303303+304304+ qp->rq.queue = rxe_queue_init(rxe,305305+ &qp->rq.max_wr,306306+ wqe_size);307307+ if (!qp->rq.queue)308308+ return -ENOMEM;309309+310310+ err = do_mmap_info(rxe, udata, false, context,311311+ qp->rq.queue->buf,312312+ qp->rq.queue->buf_size,313313+ &qp->rq.queue->ip);314314+ if (err) {315315+ kvfree(qp->rq.queue->buf);316316+ kfree(qp->rq.queue);317317+ return err;318318+ }319319+ }320320+321321+ spin_lock_init(&qp->rq.producer_lock);322322+ spin_lock_init(&qp->rq.consumer_lock);323323+324324+ skb_queue_head_init(&qp->resp_pkts);325325+326326+ rxe_init_task(rxe, &qp->resp.task, qp,327327+ rxe_responder, "resp");328328+329329+ qp->resp.opcode = OPCODE_NONE;330330+ qp->resp.msn = 0;331331+ qp->resp.state = QP_STATE_RESET;332332+333333+ return 0;334334+}335335+336336+/* called by the create qp verb */337337+int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,338338+ struct ib_qp_init_attr *init, struct ib_udata *udata,339339+ struct ib_pd *ibpd)340340+{341341+ int err;342342+ struct rxe_cq *rcq = to_rcq(init->recv_cq);343343+ struct rxe_cq *scq = to_rcq(init->send_cq);344344+ struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;345345+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;346346+347347+ rxe_add_ref(pd);348348+ rxe_add_ref(rcq);349349+ rxe_add_ref(scq);350350+ if (srq)351351+ rxe_add_ref(srq);352352+353353+ qp->pd = pd;354354+ qp->rcq = rcq;355355+ qp->scq = scq;356356+ qp->srq = srq;357357+358358+ rxe_qp_init_misc(rxe, qp, init);359359+360360+ err = rxe_qp_init_req(rxe, qp, init, context, udata);361361+ if (err)362362+ goto err1;363363+364364+ err = rxe_qp_init_resp(rxe, qp, init, context, udata);365365+ if (err)366366+ goto err2;367367+368368+ qp->attr.qp_state = IB_QPS_RESET;369369+ qp->valid = 1;370370+371371+ return 0;372372+373373+err2:374374+ rxe_queue_cleanup(qp->sq.queue);375375+err1:376376+ if (srq)377377+ rxe_drop_ref(srq);378378+ rxe_drop_ref(scq);379379+ rxe_drop_ref(rcq);380380+ rxe_drop_ref(pd);381381+382382+ return err;383383+}384384+385385+/* called by the query qp verb */386386+int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)387387+{388388+ init->event_handler = qp->ibqp.event_handler;389389+ init->qp_context = qp->ibqp.qp_context;390390+ init->send_cq = qp->ibqp.send_cq;391391+ init->recv_cq = qp->ibqp.recv_cq;392392+ init->srq = qp->ibqp.srq;393393+394394+ init->cap.max_send_wr = qp->sq.max_wr;395395+ init->cap.max_send_sge = qp->sq.max_sge;396396+ init->cap.max_inline_data = qp->sq.max_inline;397397+398398+ if (!qp->srq) {399399+ init->cap.max_recv_wr = qp->rq.max_wr;400400+ init->cap.max_recv_sge = qp->rq.max_sge;401401+ }402402+403403+ init->sq_sig_type = qp->sq_sig_type;404404+405405+ init->qp_type = qp->ibqp.qp_type;406406+ init->port_num = 1;407407+408408+ return 0;409409+}410410+411411+/* called by the modify qp verb, this routine checks all the parameters before412412+ * making any changes413413+ */414414+int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,415415+ struct ib_qp_attr *attr, int mask)416416+{417417+ enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?418418+ attr->cur_qp_state : qp->attr.qp_state;419419+ enum ib_qp_state new_state = (mask & IB_QP_STATE) ?420420+ attr->qp_state : cur_state;421421+422422+ if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,423423+ IB_LINK_LAYER_ETHERNET)) {424424+ pr_warn("invalid mask or state for qp\n");425425+ goto err1;426426+ }427427+428428+ if (mask & IB_QP_STATE) {429429+ if (cur_state == IB_QPS_SQD) {430430+ if (qp->req.state == QP_STATE_DRAIN &&431431+ new_state != IB_QPS_ERR)432432+ goto err1;433433+ }434434+ }435435+436436+ if (mask & IB_QP_PORT) {437437+ if (attr->port_num != 1) {438438+ pr_warn("invalid port %d\n", attr->port_num);439439+ goto err1;440440+ }441441+ }442442+443443+ if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))444444+ goto err1;445445+446446+ if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))447447+ goto err1;448448+449449+ if (mask & IB_QP_ALT_PATH) {450450+ if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))451451+ goto err1;452452+ if (attr->alt_port_num != 1) {453453+ pr_warn("invalid alt port %d\n", attr->alt_port_num);454454+ goto err1;455455+ }456456+ if (attr->alt_timeout > 31) {457457+ pr_warn("invalid QP alt timeout %d > 31\n",458458+ attr->alt_timeout);459459+ goto err1;460460+ }461461+ }462462+463463+ if (mask & IB_QP_PATH_MTU) {464464+ struct rxe_port *port = &rxe->port;465465+466466+ enum ib_mtu max_mtu = port->attr.max_mtu;467467+ enum ib_mtu mtu = attr->path_mtu;468468+469469+ if (mtu > max_mtu) {470470+ pr_debug("invalid mtu (%d) > (%d)\n",471471+ ib_mtu_enum_to_int(mtu),472472+ ib_mtu_enum_to_int(max_mtu));473473+ goto err1;474474+ }475475+ }476476+477477+ if (mask & IB_QP_MAX_QP_RD_ATOMIC) {478478+ if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {479479+ pr_warn("invalid max_rd_atomic %d > %d\n",480480+ attr->max_rd_atomic,481481+ rxe->attr.max_qp_rd_atom);482482+ goto err1;483483+ }484484+ }485485+486486+ if (mask & IB_QP_TIMEOUT) {487487+ if (attr->timeout > 31) {488488+ pr_warn("invalid QP timeout %d > 31\n",489489+ attr->timeout);490490+ goto err1;491491+ }492492+ }493493+494494+ return 0;495495+496496+err1:497497+ return -EINVAL;498498+}499499+500500+/* move the qp to the reset state */501501+static void rxe_qp_reset(struct rxe_qp *qp)502502+{503503+ /* stop tasks from running */504504+ rxe_disable_task(&qp->resp.task);505505+506506+ /* stop request/comp */507507+ if (qp->sq.queue) {508508+ if (qp_type(qp) == IB_QPT_RC)509509+ rxe_disable_task(&qp->comp.task);510510+ rxe_disable_task(&qp->req.task);511511+ }512512+513513+ /* move qp to the reset state */514514+ qp->req.state = QP_STATE_RESET;515515+ qp->resp.state = QP_STATE_RESET;516516+517517+ /* let state machines reset themselves drain work and packet queues518518+ * etc.519519+ */520520+ __rxe_do_task(&qp->resp.task);521521+522522+ if (qp->sq.queue) {523523+ __rxe_do_task(&qp->comp.task);524524+ __rxe_do_task(&qp->req.task);525525+ }526526+527527+ /* cleanup attributes */528528+ atomic_set(&qp->ssn, 0);529529+ qp->req.opcode = -1;530530+ qp->req.need_retry = 0;531531+ qp->req.noack_pkts = 0;532532+ qp->resp.msn = 0;533533+ qp->resp.opcode = -1;534534+ qp->resp.drop_msg = 0;535535+ qp->resp.goto_error = 0;536536+ qp->resp.sent_psn_nak = 0;537537+538538+ if (qp->resp.mr) {539539+ rxe_drop_ref(qp->resp.mr);540540+ qp->resp.mr = NULL;541541+ }542542+543543+ cleanup_rd_atomic_resources(qp);544544+545545+ /* reenable tasks */546546+ rxe_enable_task(&qp->resp.task);547547+548548+ if (qp->sq.queue) {549549+ if (qp_type(qp) == IB_QPT_RC)550550+ rxe_enable_task(&qp->comp.task);551551+552552+ rxe_enable_task(&qp->req.task);553553+ }554554+}555555+556556+/* drain the send queue */557557+static void rxe_qp_drain(struct rxe_qp *qp)558558+{559559+ if (qp->sq.queue) {560560+ if (qp->req.state != QP_STATE_DRAINED) {561561+ qp->req.state = QP_STATE_DRAIN;562562+ if (qp_type(qp) == IB_QPT_RC)563563+ rxe_run_task(&qp->comp.task, 1);564564+ else565565+ __rxe_do_task(&qp->comp.task);566566+ rxe_run_task(&qp->req.task, 1);567567+ }568568+ }569569+}570570+571571+/* move the qp to the error state */572572+void rxe_qp_error(struct rxe_qp *qp)573573+{574574+ qp->req.state = QP_STATE_ERROR;575575+ qp->resp.state = QP_STATE_ERROR;576576+577577+ /* drain work and packet queues */578578+ rxe_run_task(&qp->resp.task, 1);579579+580580+ if (qp_type(qp) == IB_QPT_RC)581581+ rxe_run_task(&qp->comp.task, 1);582582+ else583583+ __rxe_do_task(&qp->comp.task);584584+ rxe_run_task(&qp->req.task, 1);585585+}586586+587587+/* called by the modify qp verb */588588+int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,589589+ struct ib_udata *udata)590590+{591591+ int err;592592+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);593593+ union ib_gid sgid;594594+ struct ib_gid_attr sgid_attr;595595+596596+ if (mask & IB_QP_MAX_QP_RD_ATOMIC) {597597+ int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);598598+599599+ free_rd_atomic_resources(qp);600600+601601+ err = alloc_rd_atomic_resources(qp, max_rd_atomic);602602+ if (err)603603+ return err;604604+605605+ qp->attr.max_rd_atomic = max_rd_atomic;606606+ atomic_set(&qp->req.rd_atomic, max_rd_atomic);607607+ }608608+609609+ if (mask & IB_QP_CUR_STATE)610610+ qp->attr.cur_qp_state = attr->qp_state;611611+612612+ if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)613613+ qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;614614+615615+ if (mask & IB_QP_ACCESS_FLAGS)616616+ qp->attr.qp_access_flags = attr->qp_access_flags;617617+618618+ if (mask & IB_QP_PKEY_INDEX)619619+ qp->attr.pkey_index = attr->pkey_index;620620+621621+ if (mask & IB_QP_PORT)622622+ qp->attr.port_num = attr->port_num;623623+624624+ if (mask & IB_QP_QKEY)625625+ qp->attr.qkey = attr->qkey;626626+627627+ if (mask & IB_QP_AV) {628628+ ib_get_cached_gid(&rxe->ib_dev, 1,629629+ attr->ah_attr.grh.sgid_index, &sgid,630630+ &sgid_attr);631631+ rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,632632+ &attr->ah_attr);633633+ rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,634634+ &sgid_attr, &sgid);635635+ if (sgid_attr.ndev)636636+ dev_put(sgid_attr.ndev);637637+ }638638+639639+ if (mask & IB_QP_ALT_PATH) {640640+ ib_get_cached_gid(&rxe->ib_dev, 1,641641+ attr->alt_ah_attr.grh.sgid_index, &sgid,642642+ &sgid_attr);643643+644644+ rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,645645+ &attr->alt_ah_attr);646646+ rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr,647647+ &sgid_attr, &sgid);648648+ if (sgid_attr.ndev)649649+ dev_put(sgid_attr.ndev);650650+651651+ qp->attr.alt_port_num = attr->alt_port_num;652652+ qp->attr.alt_pkey_index = attr->alt_pkey_index;653653+ qp->attr.alt_timeout = attr->alt_timeout;654654+ }655655+656656+ if (mask & IB_QP_PATH_MTU) {657657+ qp->attr.path_mtu = attr->path_mtu;658658+ qp->mtu = ib_mtu_enum_to_int(attr->path_mtu);659659+ }660660+661661+ if (mask & IB_QP_TIMEOUT) {662662+ qp->attr.timeout = attr->timeout;663663+ if (attr->timeout == 0) {664664+ qp->qp_timeout_jiffies = 0;665665+ } else {666666+ /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */667667+ int j = nsecs_to_jiffies(4096ULL << attr->timeout);668668+669669+ qp->qp_timeout_jiffies = j ? j : 1;670670+ }671671+ }672672+673673+ if (mask & IB_QP_RETRY_CNT) {674674+ qp->attr.retry_cnt = attr->retry_cnt;675675+ qp->comp.retry_cnt = attr->retry_cnt;676676+ pr_debug("set retry count = %d\n", attr->retry_cnt);677677+ }678678+679679+ if (mask & IB_QP_RNR_RETRY) {680680+ qp->attr.rnr_retry = attr->rnr_retry;681681+ qp->comp.rnr_retry = attr->rnr_retry;682682+ pr_debug("set rnr retry count = %d\n", attr->rnr_retry);683683+ }684684+685685+ if (mask & IB_QP_RQ_PSN) {686686+ qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);687687+ qp->resp.psn = qp->attr.rq_psn;688688+ pr_debug("set resp psn = 0x%x\n", qp->resp.psn);689689+ }690690+691691+ if (mask & IB_QP_MIN_RNR_TIMER) {692692+ qp->attr.min_rnr_timer = attr->min_rnr_timer;693693+ pr_debug("set min rnr timer = 0x%x\n",694694+ attr->min_rnr_timer);695695+ }696696+697697+ if (mask & IB_QP_SQ_PSN) {698698+ qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);699699+ qp->req.psn = qp->attr.sq_psn;700700+ qp->comp.psn = qp->attr.sq_psn;701701+ pr_debug("set req psn = 0x%x\n", qp->req.psn);702702+ }703703+704704+ if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {705705+ qp->attr.max_dest_rd_atomic =706706+ __roundup_pow_of_two(attr->max_dest_rd_atomic);707707+ }708708+709709+ if (mask & IB_QP_PATH_MIG_STATE)710710+ qp->attr.path_mig_state = attr->path_mig_state;711711+712712+ if (mask & IB_QP_DEST_QPN)713713+ qp->attr.dest_qp_num = attr->dest_qp_num;714714+715715+ if (mask & IB_QP_STATE) {716716+ qp->attr.qp_state = attr->qp_state;717717+718718+ switch (attr->qp_state) {719719+ case IB_QPS_RESET:720720+ pr_debug("qp state -> RESET\n");721721+ rxe_qp_reset(qp);722722+ break;723723+724724+ case IB_QPS_INIT:725725+ pr_debug("qp state -> INIT\n");726726+ qp->req.state = QP_STATE_INIT;727727+ qp->resp.state = QP_STATE_INIT;728728+ break;729729+730730+ case IB_QPS_RTR:731731+ pr_debug("qp state -> RTR\n");732732+ qp->resp.state = QP_STATE_READY;733733+ break;734734+735735+ case IB_QPS_RTS:736736+ pr_debug("qp state -> RTS\n");737737+ qp->req.state = QP_STATE_READY;738738+ break;739739+740740+ case IB_QPS_SQD:741741+ pr_debug("qp state -> SQD\n");742742+ rxe_qp_drain(qp);743743+ break;744744+745745+ case IB_QPS_SQE:746746+ pr_warn("qp state -> SQE !!?\n");747747+ /* Not possible from modify_qp. */748748+ break;749749+750750+ case IB_QPS_ERR:751751+ pr_debug("qp state -> ERR\n");752752+ rxe_qp_error(qp);753753+ break;754754+ }755755+ }756756+757757+ return 0;758758+}759759+760760+/* called by the query qp verb */761761+int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)762762+{763763+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);764764+765765+ *attr = qp->attr;766766+767767+ attr->rq_psn = qp->resp.psn;768768+ attr->sq_psn = qp->req.psn;769769+770770+ attr->cap.max_send_wr = qp->sq.max_wr;771771+ attr->cap.max_send_sge = qp->sq.max_sge;772772+ attr->cap.max_inline_data = qp->sq.max_inline;773773+774774+ if (!qp->srq) {775775+ attr->cap.max_recv_wr = qp->rq.max_wr;776776+ attr->cap.max_recv_sge = qp->rq.max_sge;777777+ }778778+779779+ rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr);780780+ rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr);781781+782782+ if (qp->req.state == QP_STATE_DRAIN) {783783+ attr->sq_draining = 1;784784+ /* applications that get this state785785+ * typically spin on it. yield the786786+ * processor787787+ */788788+ cond_resched();789789+ } else {790790+ attr->sq_draining = 0;791791+ }792792+793793+ pr_debug("attr->sq_draining = %d\n", attr->sq_draining);794794+795795+ return 0;796796+}797797+798798+/* called by the destroy qp verb */799799+void rxe_qp_destroy(struct rxe_qp *qp)800800+{801801+ qp->valid = 0;802802+ qp->qp_timeout_jiffies = 0;803803+ rxe_cleanup_task(&qp->resp.task);804804+805805+ del_timer_sync(&qp->retrans_timer);806806+ del_timer_sync(&qp->rnr_nak_timer);807807+808808+ rxe_cleanup_task(&qp->req.task);809809+ if (qp_type(qp) == IB_QPT_RC)810810+ rxe_cleanup_task(&qp->comp.task);811811+812812+ /* flush out any receive wr's or pending requests */813813+ __rxe_do_task(&qp->req.task);814814+ if (qp->sq.queue) {815815+ __rxe_do_task(&qp->comp.task);816816+ __rxe_do_task(&qp->req.task);817817+ }818818+}819819+820820+/* called when the last reference to the qp is dropped */821821+void rxe_qp_cleanup(void *arg)822822+{823823+ struct rxe_qp *qp = arg;824824+825825+ rxe_drop_all_mcast_groups(qp);826826+827827+ if (qp->sq.queue)828828+ rxe_queue_cleanup(qp->sq.queue);829829+830830+ if (qp->srq)831831+ rxe_drop_ref(qp->srq);832832+833833+ if (qp->rq.queue)834834+ rxe_queue_cleanup(qp->rq.queue);835835+836836+ if (qp->scq)837837+ rxe_drop_ref(qp->scq);838838+ if (qp->rcq)839839+ rxe_drop_ref(qp->rcq);840840+ if (qp->pd)841841+ rxe_drop_ref(qp->pd);842842+843843+ if (qp->resp.mr) {844844+ rxe_drop_ref(qp->resp.mr);845845+ qp->resp.mr = NULL;846846+ }847847+848848+ free_rd_atomic_resources(qp);849849+850850+ kernel_sock_shutdown(qp->sk, SHUT_RDWR);851851+}
+217
drivers/infiniband/sw/rxe/rxe_queue.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must retailuce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/vmalloc.h>3535+#include "rxe.h"3636+#include "rxe_loc.h"3737+#include "rxe_queue.h"3838+3939+int do_mmap_info(struct rxe_dev *rxe,4040+ struct ib_udata *udata,4141+ bool is_req,4242+ struct ib_ucontext *context,4343+ struct rxe_queue_buf *buf,4444+ size_t buf_size,4545+ struct rxe_mmap_info **ip_p)4646+{4747+ int err;4848+ u32 len, offset;4949+ struct rxe_mmap_info *ip = NULL;5050+5151+ if (udata) {5252+ if (is_req) {5353+ len = udata->outlen - sizeof(struct mminfo);5454+ offset = sizeof(struct mminfo);5555+ } else {5656+ len = udata->outlen;5757+ offset = 0;5858+ }5959+6060+ if (len < sizeof(ip->info))6161+ goto err1;6262+6363+ ip = rxe_create_mmap_info(rxe, buf_size, context, buf);6464+ if (!ip)6565+ goto err1;6666+6767+ err = copy_to_user(udata->outbuf + offset, &ip->info,6868+ sizeof(ip->info));6969+ if (err)7070+ goto err2;7171+7272+ spin_lock_bh(&rxe->pending_lock);7373+ list_add(&ip->pending_mmaps, &rxe->pending_mmaps);7474+ spin_unlock_bh(&rxe->pending_lock);7575+ }7676+7777+ *ip_p = ip;7878+7979+ return 0;8080+8181+err2:8282+ kfree(ip);8383+err1:8484+ return -EINVAL;8585+}8686+8787+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,8888+ int *num_elem,8989+ unsigned int elem_size)9090+{9191+ struct rxe_queue *q;9292+ size_t buf_size;9393+ unsigned int num_slots;9494+9595+ /* num_elem == 0 is allowed, but uninteresting */9696+ if (*num_elem < 0)9797+ goto err1;9898+9999+ q = kmalloc(sizeof(*q), GFP_KERNEL);100100+ if (!q)101101+ goto err1;102102+103103+ q->rxe = rxe;104104+105105+ /* used in resize, only need to copy used part of queue */106106+ q->elem_size = elem_size;107107+108108+ /* pad element up to at least a cacheline and always a power of 2 */109109+ if (elem_size < cache_line_size())110110+ elem_size = cache_line_size();111111+ elem_size = roundup_pow_of_two(elem_size);112112+113113+ q->log2_elem_size = order_base_2(elem_size);114114+115115+ num_slots = *num_elem + 1;116116+ num_slots = roundup_pow_of_two(num_slots);117117+ q->index_mask = num_slots - 1;118118+119119+ buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;120120+121121+ q->buf = vmalloc_user(buf_size);122122+ if (!q->buf)123123+ goto err2;124124+125125+ q->buf->log2_elem_size = q->log2_elem_size;126126+ q->buf->index_mask = q->index_mask;127127+128128+ q->buf_size = buf_size;129129+130130+ *num_elem = num_slots - 1;131131+ return q;132132+133133+err2:134134+ kfree(q);135135+err1:136136+ return NULL;137137+}138138+139139+/* copies elements from original q to new q and then swaps the contents of the140140+ * two q headers. This is so that if anyone is holding a pointer to q it will141141+ * still work142142+ */143143+static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,144144+ unsigned int num_elem)145145+{146146+ if (!queue_empty(q) && (num_elem < queue_count(q)))147147+ return -EINVAL;148148+149149+ while (!queue_empty(q)) {150150+ memcpy(producer_addr(new_q), consumer_addr(q),151151+ new_q->elem_size);152152+ advance_producer(new_q);153153+ advance_consumer(q);154154+ }155155+156156+ swap(*q, *new_q);157157+158158+ return 0;159159+}160160+161161+int rxe_queue_resize(struct rxe_queue *q,162162+ unsigned int *num_elem_p,163163+ unsigned int elem_size,164164+ struct ib_ucontext *context,165165+ struct ib_udata *udata,166166+ spinlock_t *producer_lock,167167+ spinlock_t *consumer_lock)168168+{169169+ struct rxe_queue *new_q;170170+ unsigned int num_elem = *num_elem_p;171171+ int err;172172+ unsigned long flags = 0, flags1;173173+174174+ new_q = rxe_queue_init(q->rxe, &num_elem, elem_size);175175+ if (!new_q)176176+ return -ENOMEM;177177+178178+ err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf,179179+ new_q->buf_size, &new_q->ip);180180+ if (err) {181181+ vfree(new_q->buf);182182+ kfree(new_q);183183+ goto err1;184184+ }185185+186186+ spin_lock_irqsave(consumer_lock, flags1);187187+188188+ if (producer_lock) {189189+ spin_lock_irqsave(producer_lock, flags);190190+ err = resize_finish(q, new_q, num_elem);191191+ spin_unlock_irqrestore(producer_lock, flags);192192+ } else {193193+ err = resize_finish(q, new_q, num_elem);194194+ }195195+196196+ spin_unlock_irqrestore(consumer_lock, flags1);197197+198198+ rxe_queue_cleanup(new_q); /* new/old dep on err */199199+ if (err)200200+ goto err1;201201+202202+ *num_elem_p = num_elem;203203+ return 0;204204+205205+err1:206206+ return err;207207+}208208+209209+void rxe_queue_cleanup(struct rxe_queue *q)210210+{211211+ if (q->ip)212212+ kref_put(&q->ip->ref, rxe_mmap_release);213213+ else214214+ vfree(q->buf);215215+216216+ kfree(q);217217+}
+178
drivers/infiniband/sw/rxe/rxe_queue.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_QUEUE_H3535+#define RXE_QUEUE_H3636+3737+/* implements a simple circular buffer that can optionally be3838+ * shared between user space and the kernel and can be resized3939+4040+ * the requested element size is rounded up to a power of 24141+ * and the number of elements in the buffer is also rounded4242+ * up to a power of 2. Since the queue is empty when the4343+ * producer and consumer indices match the maximum capacity4444+ * of the queue is one less than the number of element slots4545+ */4646+4747+/* this data structure is shared between user space and kernel4848+ * space for those cases where the queue is shared. It contains4949+ * the producer and consumer indices. Is also contains a copy5050+ * of the queue size parameters for user space to use but the5151+ * kernel must use the parameters in the rxe_queue struct5252+ * this MUST MATCH the corresponding librxe struct5353+ * for performance reasons arrange to have producer and consumer5454+ * pointers in separate cache lines5555+ * the kernel should always mask the indices to avoid accessing5656+ * memory outside of the data area5757+ */5858+struct rxe_queue_buf {5959+ __u32 log2_elem_size;6060+ __u32 index_mask;6161+ __u32 pad_1[30];6262+ __u32 producer_index;6363+ __u32 pad_2[31];6464+ __u32 consumer_index;6565+ __u32 pad_3[31];6666+ __u8 data[0];6767+};6868+6969+struct rxe_queue {7070+ struct rxe_dev *rxe;7171+ struct rxe_queue_buf *buf;7272+ struct rxe_mmap_info *ip;7373+ size_t buf_size;7474+ size_t elem_size;7575+ unsigned int log2_elem_size;7676+ unsigned int index_mask;7777+};7878+7979+int do_mmap_info(struct rxe_dev *rxe,8080+ struct ib_udata *udata,8181+ bool is_req,8282+ struct ib_ucontext *context,8383+ struct rxe_queue_buf *buf,8484+ size_t buf_size,8585+ struct rxe_mmap_info **ip_p);8686+8787+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,8888+ int *num_elem,8989+ unsigned int elem_size);9090+9191+int rxe_queue_resize(struct rxe_queue *q,9292+ unsigned int *num_elem_p,9393+ unsigned int elem_size,9494+ struct ib_ucontext *context,9595+ struct ib_udata *udata,9696+ /* Protect producers while resizing queue */9797+ spinlock_t *producer_lock,9898+ /* Protect consumers while resizing queue */9999+ spinlock_t *consumer_lock);100100+101101+void rxe_queue_cleanup(struct rxe_queue *queue);102102+103103+static inline int next_index(struct rxe_queue *q, int index)104104+{105105+ return (index + 1) & q->buf->index_mask;106106+}107107+108108+static inline int queue_empty(struct rxe_queue *q)109109+{110110+ return ((q->buf->producer_index - q->buf->consumer_index)111111+ & q->index_mask) == 0;112112+}113113+114114+static inline int queue_full(struct rxe_queue *q)115115+{116116+ return ((q->buf->producer_index + 1 - q->buf->consumer_index)117117+ & q->index_mask) == 0;118118+}119119+120120+static inline void advance_producer(struct rxe_queue *q)121121+{122122+ q->buf->producer_index = (q->buf->producer_index + 1)123123+ & q->index_mask;124124+}125125+126126+static inline void advance_consumer(struct rxe_queue *q)127127+{128128+ q->buf->consumer_index = (q->buf->consumer_index + 1)129129+ & q->index_mask;130130+}131131+132132+static inline void *producer_addr(struct rxe_queue *q)133133+{134134+ return q->buf->data + ((q->buf->producer_index & q->index_mask)135135+ << q->log2_elem_size);136136+}137137+138138+static inline void *consumer_addr(struct rxe_queue *q)139139+{140140+ return q->buf->data + ((q->buf->consumer_index & q->index_mask)141141+ << q->log2_elem_size);142142+}143143+144144+static inline unsigned int producer_index(struct rxe_queue *q)145145+{146146+ return q->buf->producer_index;147147+}148148+149149+static inline unsigned int consumer_index(struct rxe_queue *q)150150+{151151+ return q->buf->consumer_index;152152+}153153+154154+static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)155155+{156156+ return q->buf->data + ((index & q->index_mask)157157+ << q->buf->log2_elem_size);158158+}159159+160160+static inline unsigned int index_from_addr(const struct rxe_queue *q,161161+ const void *addr)162162+{163163+ return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)164164+ & q->index_mask;165165+}166166+167167+static inline unsigned int queue_count(const struct rxe_queue *q)168168+{169169+ return (q->buf->producer_index - q->buf->consumer_index)170170+ & q->index_mask;171171+}172172+173173+static inline void *queue_head(struct rxe_queue *q)174174+{175175+ return queue_empty(q) ? NULL : consumer_addr(q);176176+}177177+178178+#endif /* RXE_QUEUE_H */
+420
drivers/infiniband/sw/rxe/rxe_recv.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+3636+#include "rxe.h"3737+#include "rxe_loc.h"3838+3939+static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,4040+ struct rxe_qp *qp)4141+{4242+ if (unlikely(!qp->valid))4343+ goto err1;4444+4545+ switch (qp_type(qp)) {4646+ case IB_QPT_RC:4747+ if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {4848+ pr_warn_ratelimited("bad qp type\n");4949+ goto err1;5050+ }5151+ break;5252+ case IB_QPT_UC:5353+ if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {5454+ pr_warn_ratelimited("bad qp type\n");5555+ goto err1;5656+ }5757+ break;5858+ case IB_QPT_UD:5959+ case IB_QPT_SMI:6060+ case IB_QPT_GSI:6161+ if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {6262+ pr_warn_ratelimited("bad qp type\n");6363+ goto err1;6464+ }6565+ break;6666+ default:6767+ pr_warn_ratelimited("unsupported qp type\n");6868+ goto err1;6969+ }7070+7171+ if (pkt->mask & RXE_REQ_MASK) {7272+ if (unlikely(qp->resp.state != QP_STATE_READY))7373+ goto err1;7474+ } else if (unlikely(qp->req.state < QP_STATE_READY ||7575+ qp->req.state > QP_STATE_DRAINED)) {7676+ goto err1;7777+ }7878+7979+ return 0;8080+8181+err1:8282+ return -EINVAL;8383+}8484+8585+static void set_bad_pkey_cntr(struct rxe_port *port)8686+{8787+ spin_lock_bh(&port->port_lock);8888+ port->attr.bad_pkey_cntr = min((u32)0xffff,8989+ port->attr.bad_pkey_cntr + 1);9090+ spin_unlock_bh(&port->port_lock);9191+}9292+9393+static void set_qkey_viol_cntr(struct rxe_port *port)9494+{9595+ spin_lock_bh(&port->port_lock);9696+ port->attr.qkey_viol_cntr = min((u32)0xffff,9797+ port->attr.qkey_viol_cntr + 1);9898+ spin_unlock_bh(&port->port_lock);9999+}100100+101101+static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,102102+ u32 qpn, struct rxe_qp *qp)103103+{104104+ int i;105105+ int found_pkey = 0;106106+ struct rxe_port *port = &rxe->port;107107+ u16 pkey = bth_pkey(pkt);108108+109109+ pkt->pkey_index = 0;110110+111111+ if (qpn == 1) {112112+ for (i = 0; i < port->attr.pkey_tbl_len; i++) {113113+ if (pkey_match(pkey, port->pkey_tbl[i])) {114114+ pkt->pkey_index = i;115115+ found_pkey = 1;116116+ break;117117+ }118118+ }119119+120120+ if (!found_pkey) {121121+ pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);122122+ set_bad_pkey_cntr(port);123123+ goto err1;124124+ }125125+ } else if (qpn != 0) {126126+ if (unlikely(!pkey_match(pkey,127127+ port->pkey_tbl[qp->attr.pkey_index]128128+ ))) {129129+ pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey);130130+ set_bad_pkey_cntr(port);131131+ goto err1;132132+ }133133+ pkt->pkey_index = qp->attr.pkey_index;134134+ }135135+136136+ if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&137137+ qpn != 0 && pkt->mask) {138138+ u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;139139+140140+ if (unlikely(deth_qkey(pkt) != qkey)) {141141+ pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n",142142+ deth_qkey(pkt), qkey, qpn);143143+ set_qkey_viol_cntr(port);144144+ goto err1;145145+ }146146+ }147147+148148+ return 0;149149+150150+err1:151151+ return -EINVAL;152152+}153153+154154+static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,155155+ struct rxe_qp *qp)156156+{157157+ struct sk_buff *skb = PKT_TO_SKB(pkt);158158+159159+ if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC)160160+ goto done;161161+162162+ if (unlikely(pkt->port_num != qp->attr.port_num)) {163163+ pr_warn_ratelimited("port %d != qp port %d\n",164164+ pkt->port_num, qp->attr.port_num);165165+ goto err1;166166+ }167167+168168+ if (skb->protocol == htons(ETH_P_IP)) {169169+ struct in_addr *saddr =170170+ &qp->pri_av.sgid_addr._sockaddr_in.sin_addr;171171+ struct in_addr *daddr =172172+ &qp->pri_av.dgid_addr._sockaddr_in.sin_addr;173173+174174+ if (ip_hdr(skb)->daddr != saddr->s_addr) {175175+ pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n",176176+ &ip_hdr(skb)->daddr,177177+ &saddr->s_addr);178178+ goto err1;179179+ }180180+181181+ if (ip_hdr(skb)->saddr != daddr->s_addr) {182182+ pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n",183183+ &ip_hdr(skb)->saddr,184184+ &daddr->s_addr);185185+ goto err1;186186+ }187187+188188+ } else if (skb->protocol == htons(ETH_P_IPV6)) {189189+ struct in6_addr *saddr =190190+ &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr;191191+ struct in6_addr *daddr =192192+ &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr;193193+194194+ if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) {195195+ pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n",196196+ &ipv6_hdr(skb)->daddr, saddr);197197+ goto err1;198198+ }199199+200200+ if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) {201201+ pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n",202202+ &ipv6_hdr(skb)->saddr, daddr);203203+ goto err1;204204+ }205205+ }206206+207207+done:208208+ return 0;209209+210210+err1:211211+ return -EINVAL;212212+}213213+214214+static int hdr_check(struct rxe_pkt_info *pkt)215215+{216216+ struct rxe_dev *rxe = pkt->rxe;217217+ struct rxe_port *port = &rxe->port;218218+ struct rxe_qp *qp = NULL;219219+ u32 qpn = bth_qpn(pkt);220220+ int index;221221+ int err;222222+223223+ if (unlikely(bth_tver(pkt) != BTH_TVER)) {224224+ pr_warn_ratelimited("bad tver\n");225225+ goto err1;226226+ }227227+228228+ if (qpn != IB_MULTICAST_QPN) {229229+ index = (qpn == 0) ? port->qp_smi_index :230230+ ((qpn == 1) ? port->qp_gsi_index : qpn);231231+ qp = rxe_pool_get_index(&rxe->qp_pool, index);232232+ if (unlikely(!qp)) {233233+ pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);234234+ goto err1;235235+ }236236+237237+ err = check_type_state(rxe, pkt, qp);238238+ if (unlikely(err))239239+ goto err2;240240+241241+ err = check_addr(rxe, pkt, qp);242242+ if (unlikely(err))243243+ goto err2;244244+245245+ err = check_keys(rxe, pkt, qpn, qp);246246+ if (unlikely(err))247247+ goto err2;248248+ } else {249249+ if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) {250250+ pr_warn_ratelimited("no grh for mcast qpn\n");251251+ goto err1;252252+ }253253+ }254254+255255+ pkt->qp = qp;256256+ return 0;257257+258258+err2:259259+ if (qp)260260+ rxe_drop_ref(qp);261261+err1:262262+ return -EINVAL;263263+}264264+265265+static inline void rxe_rcv_pkt(struct rxe_dev *rxe,266266+ struct rxe_pkt_info *pkt,267267+ struct sk_buff *skb)268268+{269269+ if (pkt->mask & RXE_REQ_MASK)270270+ rxe_resp_queue_pkt(rxe, pkt->qp, skb);271271+ else272272+ rxe_comp_queue_pkt(rxe, pkt->qp, skb);273273+}274274+275275+static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)276276+{277277+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);278278+ struct rxe_mc_grp *mcg;279279+ struct sk_buff *skb_copy;280280+ struct rxe_mc_elem *mce;281281+ struct rxe_qp *qp;282282+ union ib_gid dgid;283283+ int err;284284+285285+ if (skb->protocol == htons(ETH_P_IP))286286+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,287287+ (struct in6_addr *)&dgid);288288+ else if (skb->protocol == htons(ETH_P_IPV6))289289+ memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));290290+291291+ /* lookup mcast group corresponding to mgid, takes a ref */292292+ mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);293293+ if (!mcg)294294+ goto err1; /* mcast group not registered */295295+296296+ spin_lock_bh(&mcg->mcg_lock);297297+298298+ list_for_each_entry(mce, &mcg->qp_list, qp_list) {299299+ qp = mce->qp;300300+ pkt = SKB_TO_PKT(skb);301301+302302+ /* validate qp for incoming packet */303303+ err = check_type_state(rxe, pkt, qp);304304+ if (err)305305+ continue;306306+307307+ err = check_keys(rxe, pkt, bth_qpn(pkt), qp);308308+ if (err)309309+ continue;310310+311311+ /* if *not* the last qp in the list312312+ * make a copy of the skb to post to the next qp313313+ */314314+ skb_copy = (mce->qp_list.next != &mcg->qp_list) ?315315+ skb_clone(skb, GFP_KERNEL) : NULL;316316+317317+ pkt->qp = qp;318318+ rxe_add_ref(qp);319319+ rxe_rcv_pkt(rxe, pkt, skb);320320+321321+ skb = skb_copy;322322+ if (!skb)323323+ break;324324+ }325325+326326+ spin_unlock_bh(&mcg->mcg_lock);327327+328328+ rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */329329+330330+err1:331331+ if (skb)332332+ kfree_skb(skb);333333+}334334+335335+static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)336336+{337337+ union ib_gid dgid;338338+ union ib_gid *pdgid;339339+ u16 index;340340+341341+ if (skb->protocol == htons(ETH_P_IP)) {342342+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,343343+ (struct in6_addr *)&dgid);344344+ pdgid = &dgid;345345+ } else {346346+ pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;347347+ }348348+349349+ return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,350350+ IB_GID_TYPE_ROCE_UDP_ENCAP,351351+ 1, rxe->ndev, &index);352352+}353353+354354+/* rxe_rcv is called from the interface driver */355355+int rxe_rcv(struct sk_buff *skb)356356+{357357+ int err;358358+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);359359+ struct rxe_dev *rxe = pkt->rxe;360360+ __be32 *icrcp;361361+ u32 calc_icrc, pack_icrc;362362+363363+ pkt->offset = 0;364364+365365+ if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))366366+ goto drop;367367+368368+ if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {369369+ pr_warn_ratelimited("failed matching dgid\n");370370+ goto drop;371371+ }372372+373373+ pkt->opcode = bth_opcode(pkt);374374+ pkt->psn = bth_psn(pkt);375375+ pkt->qp = NULL;376376+ pkt->mask |= rxe_opcode[pkt->opcode].mask;377377+378378+ if (unlikely(skb->len < header_size(pkt)))379379+ goto drop;380380+381381+ err = hdr_check(pkt);382382+ if (unlikely(err))383383+ goto drop;384384+385385+ /* Verify ICRC */386386+ icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);387387+ pack_icrc = be32_to_cpu(*icrcp);388388+389389+ calc_icrc = rxe_icrc_hdr(pkt, skb);390390+ calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt));391391+ calc_icrc = cpu_to_be32(~calc_icrc);392392+ if (unlikely(calc_icrc != pack_icrc)) {393393+ char saddr[sizeof(struct in6_addr)];394394+395395+ if (skb->protocol == htons(ETH_P_IPV6))396396+ sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);397397+ else if (skb->protocol == htons(ETH_P_IP))398398+ sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);399399+ else400400+ sprintf(saddr, "unknown");401401+402402+ pr_warn_ratelimited("bad ICRC from %s\n", saddr);403403+ goto drop;404404+ }405405+406406+ if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))407407+ rxe_rcv_mcast_pkt(rxe, skb);408408+ else409409+ rxe_rcv_pkt(rxe, pkt, skb);410410+411411+ return 0;412412+413413+drop:414414+ if (pkt->qp)415415+ rxe_drop_ref(pkt->qp);416416+417417+ kfree_skb(skb);418418+ return 0;419419+}420420+EXPORT_SYMBOL(rxe_rcv);
+726
drivers/infiniband/sw/rxe/rxe_req.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+3636+#include "rxe.h"3737+#include "rxe_loc.h"3838+#include "rxe_queue.h"3939+4040+static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,4141+ unsigned opcode);4242+4343+static inline void retry_first_write_send(struct rxe_qp *qp,4444+ struct rxe_send_wqe *wqe,4545+ unsigned mask, int npsn)4646+{4747+ int i;4848+4949+ for (i = 0; i < npsn; i++) {5050+ int to_send = (wqe->dma.resid > qp->mtu) ?5151+ qp->mtu : wqe->dma.resid;5252+5353+ qp->req.opcode = next_opcode(qp, wqe,5454+ wqe->wr.opcode);5555+5656+ if (wqe->wr.send_flags & IB_SEND_INLINE) {5757+ wqe->dma.resid -= to_send;5858+ wqe->dma.sge_offset += to_send;5959+ } else {6060+ advance_dma_data(&wqe->dma, to_send);6161+ }6262+ if (mask & WR_WRITE_MASK)6363+ wqe->iova += qp->mtu;6464+ }6565+}6666+6767+static void req_retry(struct rxe_qp *qp)6868+{6969+ struct rxe_send_wqe *wqe;7070+ unsigned int wqe_index;7171+ unsigned int mask;7272+ int npsn;7373+ int first = 1;7474+7575+ wqe = queue_head(qp->sq.queue);7676+ npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;7777+7878+ qp->req.wqe_index = consumer_index(qp->sq.queue);7979+ qp->req.psn = qp->comp.psn;8080+ qp->req.opcode = -1;8181+8282+ for (wqe_index = consumer_index(qp->sq.queue);8383+ wqe_index != producer_index(qp->sq.queue);8484+ wqe_index = next_index(qp->sq.queue, wqe_index)) {8585+ wqe = addr_from_index(qp->sq.queue, wqe_index);8686+ mask = wr_opcode_mask(wqe->wr.opcode, qp);8787+8888+ if (wqe->state == wqe_state_posted)8989+ break;9090+9191+ if (wqe->state == wqe_state_done)9292+ continue;9393+9494+ wqe->iova = (mask & WR_ATOMIC_MASK) ?9595+ wqe->wr.wr.atomic.remote_addr :9696+ (mask & WR_READ_OR_WRITE_MASK) ?9797+ wqe->wr.wr.rdma.remote_addr :9898+ 0;9999+100100+ if (!first || (mask & WR_READ_MASK) == 0) {101101+ wqe->dma.resid = wqe->dma.length;102102+ wqe->dma.cur_sge = 0;103103+ wqe->dma.sge_offset = 0;104104+ }105105+106106+ if (first) {107107+ first = 0;108108+109109+ if (mask & WR_WRITE_OR_SEND_MASK)110110+ retry_first_write_send(qp, wqe, mask, npsn);111111+112112+ if (mask & WR_READ_MASK)113113+ wqe->iova += npsn * qp->mtu;114114+ }115115+116116+ wqe->state = wqe_state_posted;117117+ }118118+}119119+120120+void rnr_nak_timer(unsigned long data)121121+{122122+ struct rxe_qp *qp = (struct rxe_qp *)data;123123+124124+ pr_debug("rnr nak timer fired\n");125125+ rxe_run_task(&qp->req.task, 1);126126+}127127+128128+static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)129129+{130130+ struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);131131+ unsigned long flags;132132+133133+ if (unlikely(qp->req.state == QP_STATE_DRAIN)) {134134+ /* check to see if we are drained;135135+ * state_lock used by requester and completer136136+ */137137+ spin_lock_irqsave(&qp->state_lock, flags);138138+ do {139139+ if (qp->req.state != QP_STATE_DRAIN) {140140+ /* comp just finished */141141+ spin_unlock_irqrestore(&qp->state_lock,142142+ flags);143143+ break;144144+ }145145+146146+ if (wqe && ((qp->req.wqe_index !=147147+ consumer_index(qp->sq.queue)) ||148148+ (wqe->state != wqe_state_posted))) {149149+ /* comp not done yet */150150+ spin_unlock_irqrestore(&qp->state_lock,151151+ flags);152152+ break;153153+ }154154+155155+ qp->req.state = QP_STATE_DRAINED;156156+ spin_unlock_irqrestore(&qp->state_lock, flags);157157+158158+ if (qp->ibqp.event_handler) {159159+ struct ib_event ev;160160+161161+ ev.device = qp->ibqp.device;162162+ ev.element.qp = &qp->ibqp;163163+ ev.event = IB_EVENT_SQ_DRAINED;164164+ qp->ibqp.event_handler(&ev,165165+ qp->ibqp.qp_context);166166+ }167167+ } while (0);168168+ }169169+170170+ if (qp->req.wqe_index == producer_index(qp->sq.queue))171171+ return NULL;172172+173173+ wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);174174+175175+ if (unlikely((qp->req.state == QP_STATE_DRAIN ||176176+ qp->req.state == QP_STATE_DRAINED) &&177177+ (wqe->state != wqe_state_processing)))178178+ return NULL;179179+180180+ if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&181181+ (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {182182+ qp->req.wait_fence = 1;183183+ return NULL;184184+ }185185+186186+ wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);187187+ return wqe;188188+}189189+190190+static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)191191+{192192+ switch (opcode) {193193+ case IB_WR_RDMA_WRITE:194194+ if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||195195+ qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)196196+ return fits ?197197+ IB_OPCODE_RC_RDMA_WRITE_LAST :198198+ IB_OPCODE_RC_RDMA_WRITE_MIDDLE;199199+ else200200+ return fits ?201201+ IB_OPCODE_RC_RDMA_WRITE_ONLY :202202+ IB_OPCODE_RC_RDMA_WRITE_FIRST;203203+204204+ case IB_WR_RDMA_WRITE_WITH_IMM:205205+ if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||206206+ qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)207207+ return fits ?208208+ IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :209209+ IB_OPCODE_RC_RDMA_WRITE_MIDDLE;210210+ else211211+ return fits ?212212+ IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :213213+ IB_OPCODE_RC_RDMA_WRITE_FIRST;214214+215215+ case IB_WR_SEND:216216+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||217217+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)218218+ return fits ?219219+ IB_OPCODE_RC_SEND_LAST :220220+ IB_OPCODE_RC_SEND_MIDDLE;221221+ else222222+ return fits ?223223+ IB_OPCODE_RC_SEND_ONLY :224224+ IB_OPCODE_RC_SEND_FIRST;225225+226226+ case IB_WR_SEND_WITH_IMM:227227+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||228228+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)229229+ return fits ?230230+ IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :231231+ IB_OPCODE_RC_SEND_MIDDLE;232232+ else233233+ return fits ?234234+ IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :235235+ IB_OPCODE_RC_SEND_FIRST;236236+237237+ case IB_WR_RDMA_READ:238238+ return IB_OPCODE_RC_RDMA_READ_REQUEST;239239+240240+ case IB_WR_ATOMIC_CMP_AND_SWP:241241+ return IB_OPCODE_RC_COMPARE_SWAP;242242+243243+ case IB_WR_ATOMIC_FETCH_AND_ADD:244244+ return IB_OPCODE_RC_FETCH_ADD;245245+246246+ case IB_WR_SEND_WITH_INV:247247+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||248248+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)249249+ return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :250250+ IB_OPCODE_RC_SEND_MIDDLE;251251+ else252252+ return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :253253+ IB_OPCODE_RC_SEND_FIRST;254254+ case IB_WR_REG_MR:255255+ case IB_WR_LOCAL_INV:256256+ return opcode;257257+ }258258+259259+ return -EINVAL;260260+}261261+262262+static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)263263+{264264+ switch (opcode) {265265+ case IB_WR_RDMA_WRITE:266266+ if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||267267+ qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)268268+ return fits ?269269+ IB_OPCODE_UC_RDMA_WRITE_LAST :270270+ IB_OPCODE_UC_RDMA_WRITE_MIDDLE;271271+ else272272+ return fits ?273273+ IB_OPCODE_UC_RDMA_WRITE_ONLY :274274+ IB_OPCODE_UC_RDMA_WRITE_FIRST;275275+276276+ case IB_WR_RDMA_WRITE_WITH_IMM:277277+ if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||278278+ qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)279279+ return fits ?280280+ IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :281281+ IB_OPCODE_UC_RDMA_WRITE_MIDDLE;282282+ else283283+ return fits ?284284+ IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :285285+ IB_OPCODE_UC_RDMA_WRITE_FIRST;286286+287287+ case IB_WR_SEND:288288+ if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||289289+ qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)290290+ return fits ?291291+ IB_OPCODE_UC_SEND_LAST :292292+ IB_OPCODE_UC_SEND_MIDDLE;293293+ else294294+ return fits ?295295+ IB_OPCODE_UC_SEND_ONLY :296296+ IB_OPCODE_UC_SEND_FIRST;297297+298298+ case IB_WR_SEND_WITH_IMM:299299+ if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||300300+ qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)301301+ return fits ?302302+ IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :303303+ IB_OPCODE_UC_SEND_MIDDLE;304304+ else305305+ return fits ?306306+ IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :307307+ IB_OPCODE_UC_SEND_FIRST;308308+ }309309+310310+ return -EINVAL;311311+}312312+313313+static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,314314+ unsigned opcode)315315+{316316+ int fits = (wqe->dma.resid <= qp->mtu);317317+318318+ switch (qp_type(qp)) {319319+ case IB_QPT_RC:320320+ return next_opcode_rc(qp, opcode, fits);321321+322322+ case IB_QPT_UC:323323+ return next_opcode_uc(qp, opcode, fits);324324+325325+ case IB_QPT_SMI:326326+ case IB_QPT_UD:327327+ case IB_QPT_GSI:328328+ switch (opcode) {329329+ case IB_WR_SEND:330330+ return IB_OPCODE_UD_SEND_ONLY;331331+332332+ case IB_WR_SEND_WITH_IMM:333333+ return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;334334+ }335335+ break;336336+337337+ default:338338+ break;339339+ }340340+341341+ return -EINVAL;342342+}343343+344344+static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)345345+{346346+ int depth;347347+348348+ if (wqe->has_rd_atomic)349349+ return 0;350350+351351+ qp->req.need_rd_atomic = 1;352352+ depth = atomic_dec_return(&qp->req.rd_atomic);353353+354354+ if (depth >= 0) {355355+ qp->req.need_rd_atomic = 0;356356+ wqe->has_rd_atomic = 1;357357+ return 0;358358+ }359359+360360+ atomic_inc(&qp->req.rd_atomic);361361+ return -EAGAIN;362362+}363363+364364+static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)365365+{366366+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);367367+ struct rxe_port *port;368368+ struct rxe_av *av;369369+370370+ if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))371371+ return qp->mtu;372372+373373+ av = &wqe->av;374374+ port = &rxe->port;375375+376376+ return port->mtu_cap;377377+}378378+379379+static struct sk_buff *init_req_packet(struct rxe_qp *qp,380380+ struct rxe_send_wqe *wqe,381381+ int opcode, int payload,382382+ struct rxe_pkt_info *pkt)383383+{384384+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);385385+ struct rxe_port *port = &rxe->port;386386+ struct sk_buff *skb;387387+ struct rxe_send_wr *ibwr = &wqe->wr;388388+ struct rxe_av *av;389389+ int pad = (-payload) & 0x3;390390+ int paylen;391391+ int solicited;392392+ u16 pkey;393393+ u32 qp_num;394394+ int ack_req;395395+396396+ /* length from start of bth to end of icrc */397397+ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;398398+399399+ /* pkt->hdr, rxe, port_num and mask are initialized in ifc400400+ * layer401401+ */402402+ pkt->opcode = opcode;403403+ pkt->qp = qp;404404+ pkt->psn = qp->req.psn;405405+ pkt->mask = rxe_opcode[opcode].mask;406406+ pkt->paylen = paylen;407407+ pkt->offset = 0;408408+ pkt->wqe = wqe;409409+410410+ /* init skb */411411+ av = rxe_get_av(pkt);412412+ skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);413413+ if (unlikely(!skb))414414+ return NULL;415415+416416+ /* init bth */417417+ solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&418418+ (pkt->mask & RXE_END_MASK) &&419419+ ((pkt->mask & (RXE_SEND_MASK)) ||420420+ (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==421421+ (RXE_WRITE_MASK | RXE_IMMDT_MASK));422422+423423+ pkey = (qp_type(qp) == IB_QPT_GSI) ?424424+ port->pkey_tbl[ibwr->wr.ud.pkey_index] :425425+ port->pkey_tbl[qp->attr.pkey_index];426426+427427+ qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :428428+ qp->attr.dest_qp_num;429429+430430+ ack_req = ((pkt->mask & RXE_END_MASK) ||431431+ (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));432432+ if (ack_req)433433+ qp->req.noack_pkts = 0;434434+435435+ bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,436436+ ack_req, pkt->psn);437437+438438+ /* init optional headers */439439+ if (pkt->mask & RXE_RETH_MASK) {440440+ reth_set_rkey(pkt, ibwr->wr.rdma.rkey);441441+ reth_set_va(pkt, wqe->iova);442442+ reth_set_len(pkt, wqe->dma.length);443443+ }444444+445445+ if (pkt->mask & RXE_IMMDT_MASK)446446+ immdt_set_imm(pkt, ibwr->ex.imm_data);447447+448448+ if (pkt->mask & RXE_IETH_MASK)449449+ ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);450450+451451+ if (pkt->mask & RXE_ATMETH_MASK) {452452+ atmeth_set_va(pkt, wqe->iova);453453+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||454454+ opcode == IB_OPCODE_RD_COMPARE_SWAP) {455455+ atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);456456+ atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);457457+ } else {458458+ atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);459459+ }460460+ atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);461461+ }462462+463463+ if (pkt->mask & RXE_DETH_MASK) {464464+ if (qp->ibqp.qp_num == 1)465465+ deth_set_qkey(pkt, GSI_QKEY);466466+ else467467+ deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);468468+ deth_set_sqp(pkt, qp->ibqp.qp_num);469469+ }470470+471471+ return skb;472472+}473473+474474+static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,475475+ struct rxe_pkt_info *pkt, struct sk_buff *skb,476476+ int paylen)477477+{478478+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);479479+ u32 crc = 0;480480+ u32 *p;481481+ int err;482482+483483+ err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);484484+ if (err)485485+ return err;486486+487487+ if (pkt->mask & RXE_WRITE_OR_SEND) {488488+ if (wqe->wr.send_flags & IB_SEND_INLINE) {489489+ u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];490490+491491+ crc = crc32_le(crc, tmp, paylen);492492+493493+ memcpy(payload_addr(pkt), tmp, paylen);494494+495495+ wqe->dma.resid -= paylen;496496+ wqe->dma.sge_offset += paylen;497497+ } else {498498+ err = copy_data(rxe, qp->pd, 0, &wqe->dma,499499+ payload_addr(pkt), paylen,500500+ from_mem_obj,501501+ &crc);502502+ if (err)503503+ return err;504504+ }505505+ }506506+ p = payload_addr(pkt) + paylen + bth_pad(pkt);507507+508508+ *p = ~crc;509509+510510+ return 0;511511+}512512+513513+static void update_wqe_state(struct rxe_qp *qp,514514+ struct rxe_send_wqe *wqe,515515+ struct rxe_pkt_info *pkt,516516+ enum wqe_state *prev_state)517517+{518518+ enum wqe_state prev_state_ = wqe->state;519519+520520+ if (pkt->mask & RXE_END_MASK) {521521+ if (qp_type(qp) == IB_QPT_RC)522522+ wqe->state = wqe_state_pending;523523+ } else {524524+ wqe->state = wqe_state_processing;525525+ }526526+527527+ *prev_state = prev_state_;528528+}529529+530530+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,531531+ struct rxe_pkt_info *pkt, int payload)532532+{533533+ /* number of packets left to send including current one */534534+ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;535535+536536+ /* handle zero length packet case */537537+ if (num_pkt == 0)538538+ num_pkt = 1;539539+540540+ if (pkt->mask & RXE_START_MASK) {541541+ wqe->first_psn = qp->req.psn;542542+ wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;543543+ }544544+545545+ if (pkt->mask & RXE_READ_MASK)546546+ qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;547547+ else548548+ qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;549549+550550+ qp->req.opcode = pkt->opcode;551551+552552+553553+ if (pkt->mask & RXE_END_MASK)554554+ qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);555555+556556+ qp->need_req_skb = 0;557557+558558+ if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))559559+ mod_timer(&qp->retrans_timer,560560+ jiffies + qp->qp_timeout_jiffies);561561+}562562+563563+int rxe_requester(void *arg)564564+{565565+ struct rxe_qp *qp = (struct rxe_qp *)arg;566566+ struct rxe_pkt_info pkt;567567+ struct sk_buff *skb;568568+ struct rxe_send_wqe *wqe;569569+ unsigned mask;570570+ int payload;571571+ int mtu;572572+ int opcode;573573+ int ret;574574+ enum wqe_state prev_state;575575+576576+next_wqe:577577+ if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))578578+ goto exit;579579+580580+ if (unlikely(qp->req.state == QP_STATE_RESET)) {581581+ qp->req.wqe_index = consumer_index(qp->sq.queue);582582+ qp->req.opcode = -1;583583+ qp->req.need_rd_atomic = 0;584584+ qp->req.wait_psn = 0;585585+ qp->req.need_retry = 0;586586+ goto exit;587587+ }588588+589589+ if (unlikely(qp->req.need_retry)) {590590+ req_retry(qp);591591+ qp->req.need_retry = 0;592592+ }593593+594594+ wqe = req_next_wqe(qp);595595+ if (unlikely(!wqe))596596+ goto exit;597597+598598+ if (wqe->mask & WR_REG_MASK) {599599+ if (wqe->wr.opcode == IB_WR_LOCAL_INV) {600600+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);601601+ struct rxe_mem *rmr;602602+603603+ rmr = rxe_pool_get_index(&rxe->mr_pool,604604+ wqe->wr.ex.invalidate_rkey >> 8);605605+ if (!rmr) {606606+ pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey);607607+ wqe->state = wqe_state_error;608608+ wqe->status = IB_WC_MW_BIND_ERR;609609+ goto exit;610610+ }611611+ rmr->state = RXE_MEM_STATE_FREE;612612+ wqe->state = wqe_state_done;613613+ wqe->status = IB_WC_SUCCESS;614614+ } else if (wqe->wr.opcode == IB_WR_REG_MR) {615615+ struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);616616+617617+ rmr->state = RXE_MEM_STATE_VALID;618618+ rmr->access = wqe->wr.wr.reg.access;619619+ rmr->lkey = wqe->wr.wr.reg.key;620620+ rmr->rkey = wqe->wr.wr.reg.key;621621+ wqe->state = wqe_state_done;622622+ wqe->status = IB_WC_SUCCESS;623623+ } else {624624+ goto exit;625625+ }626626+ qp->req.wqe_index = next_index(qp->sq.queue,627627+ qp->req.wqe_index);628628+ goto next_wqe;629629+ }630630+631631+ if (unlikely(qp_type(qp) == IB_QPT_RC &&632632+ qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {633633+ qp->req.wait_psn = 1;634634+ goto exit;635635+ }636636+637637+ /* Limit the number of inflight SKBs per QP */638638+ if (unlikely(atomic_read(&qp->skb_out) >639639+ RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {640640+ qp->need_req_skb = 1;641641+ goto exit;642642+ }643643+644644+ opcode = next_opcode(qp, wqe, wqe->wr.opcode);645645+ if (unlikely(opcode < 0)) {646646+ wqe->status = IB_WC_LOC_QP_OP_ERR;647647+ goto exit;648648+ }649649+650650+ mask = rxe_opcode[opcode].mask;651651+ if (unlikely(mask & RXE_READ_OR_ATOMIC)) {652652+ if (check_init_depth(qp, wqe))653653+ goto exit;654654+ }655655+656656+ mtu = get_mtu(qp, wqe);657657+ payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;658658+ if (payload > mtu) {659659+ if (qp_type(qp) == IB_QPT_UD) {660660+ /* C10-93.1.1: If the total sum of all the buffer lengths specified for a661661+ * UD message exceeds the MTU of the port as returned by QueryHCA, the CI662662+ * shall not emit any packets for this message. Further, the CI shall not663663+ * generate an error due to this condition.664664+ */665665+666666+ /* fake a successful UD send */667667+ wqe->first_psn = qp->req.psn;668668+ wqe->last_psn = qp->req.psn;669669+ qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;670670+ qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;671671+ qp->req.wqe_index = next_index(qp->sq.queue,672672+ qp->req.wqe_index);673673+ wqe->state = wqe_state_done;674674+ wqe->status = IB_WC_SUCCESS;675675+ goto complete;676676+ }677677+ payload = mtu;678678+ }679679+680680+ skb = init_req_packet(qp, wqe, opcode, payload, &pkt);681681+ if (unlikely(!skb)) {682682+ pr_err("Failed allocating skb\n");683683+ goto err;684684+ }685685+686686+ if (fill_packet(qp, wqe, &pkt, skb, payload)) {687687+ pr_debug("Error during fill packet\n");688688+ goto err;689689+ }690690+691691+ update_wqe_state(qp, wqe, &pkt, &prev_state);692692+ ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);693693+ if (ret) {694694+ qp->need_req_skb = 1;695695+ kfree_skb(skb);696696+697697+ wqe->state = prev_state;698698+699699+ if (ret == -EAGAIN) {700700+ rxe_run_task(&qp->req.task, 1);701701+ goto exit;702702+ }703703+704704+ goto err;705705+ }706706+707707+ update_state(qp, wqe, &pkt, payload);708708+709709+ goto next_wqe;710710+711711+err:712712+ kfree_skb(skb);713713+ wqe->status = IB_WC_LOC_PROT_ERR;714714+ wqe->state = wqe_state_error;715715+716716+complete:717717+ if (qp_type(qp) != IB_QPT_RC) {718718+ while (rxe_completer(qp) == 0)719719+ ;720720+ }721721+722722+ return 0;723723+724724+exit:725725+ return -EAGAIN;726726+}
+1380
drivers/infiniband/sw/rxe/rxe_resp.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/skbuff.h>3535+3636+#include "rxe.h"3737+#include "rxe_loc.h"3838+#include "rxe_queue.h"3939+4040+enum resp_states {4141+ RESPST_NONE,4242+ RESPST_GET_REQ,4343+ RESPST_CHK_PSN,4444+ RESPST_CHK_OP_SEQ,4545+ RESPST_CHK_OP_VALID,4646+ RESPST_CHK_RESOURCE,4747+ RESPST_CHK_LENGTH,4848+ RESPST_CHK_RKEY,4949+ RESPST_EXECUTE,5050+ RESPST_READ_REPLY,5151+ RESPST_COMPLETE,5252+ RESPST_ACKNOWLEDGE,5353+ RESPST_CLEANUP,5454+ RESPST_DUPLICATE_REQUEST,5555+ RESPST_ERR_MALFORMED_WQE,5656+ RESPST_ERR_UNSUPPORTED_OPCODE,5757+ RESPST_ERR_MISALIGNED_ATOMIC,5858+ RESPST_ERR_PSN_OUT_OF_SEQ,5959+ RESPST_ERR_MISSING_OPCODE_FIRST,6060+ RESPST_ERR_MISSING_OPCODE_LAST_C,6161+ RESPST_ERR_MISSING_OPCODE_LAST_D1E,6262+ RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,6363+ RESPST_ERR_RNR,6464+ RESPST_ERR_RKEY_VIOLATION,6565+ RESPST_ERR_LENGTH,6666+ RESPST_ERR_CQ_OVERFLOW,6767+ RESPST_ERROR,6868+ RESPST_RESET,6969+ RESPST_DONE,7070+ RESPST_EXIT,7171+};7272+7373+static char *resp_state_name[] = {7474+ [RESPST_NONE] = "NONE",7575+ [RESPST_GET_REQ] = "GET_REQ",7676+ [RESPST_CHK_PSN] = "CHK_PSN",7777+ [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ",7878+ [RESPST_CHK_OP_VALID] = "CHK_OP_VALID",7979+ [RESPST_CHK_RESOURCE] = "CHK_RESOURCE",8080+ [RESPST_CHK_LENGTH] = "CHK_LENGTH",8181+ [RESPST_CHK_RKEY] = "CHK_RKEY",8282+ [RESPST_EXECUTE] = "EXECUTE",8383+ [RESPST_READ_REPLY] = "READ_REPLY",8484+ [RESPST_COMPLETE] = "COMPLETE",8585+ [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",8686+ [RESPST_CLEANUP] = "CLEANUP",8787+ [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST",8888+ [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE",8989+ [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE",9090+ [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC",9191+ [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ",9292+ [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST",9393+ [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C",9494+ [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E",9595+ [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",9696+ [RESPST_ERR_RNR] = "ERR_RNR",9797+ [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",9898+ [RESPST_ERR_LENGTH] = "ERR_LENGTH",9999+ [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",100100+ [RESPST_ERROR] = "ERROR",101101+ [RESPST_RESET] = "RESET",102102+ [RESPST_DONE] = "DONE",103103+ [RESPST_EXIT] = "EXIT",104104+};105105+106106+/* rxe_recv calls here to add a request packet to the input queue */107107+void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,108108+ struct sk_buff *skb)109109+{110110+ int must_sched;111111+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);112112+113113+ skb_queue_tail(&qp->req_pkts, skb);114114+115115+ must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||116116+ (skb_queue_len(&qp->req_pkts) > 1);117117+118118+ rxe_run_task(&qp->resp.task, must_sched);119119+}120120+121121+static inline enum resp_states get_req(struct rxe_qp *qp,122122+ struct rxe_pkt_info **pkt_p)123123+{124124+ struct sk_buff *skb;125125+126126+ if (qp->resp.state == QP_STATE_ERROR) {127127+ skb = skb_dequeue(&qp->req_pkts);128128+ if (skb) {129129+ /* drain request packet queue */130130+ rxe_drop_ref(qp);131131+ kfree_skb(skb);132132+ return RESPST_GET_REQ;133133+ }134134+135135+ /* go drain recv wr queue */136136+ return RESPST_CHK_RESOURCE;137137+ }138138+139139+ skb = skb_peek(&qp->req_pkts);140140+ if (!skb)141141+ return RESPST_EXIT;142142+143143+ *pkt_p = SKB_TO_PKT(skb);144144+145145+ return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;146146+}147147+148148+static enum resp_states check_psn(struct rxe_qp *qp,149149+ struct rxe_pkt_info *pkt)150150+{151151+ int diff = psn_compare(pkt->psn, qp->resp.psn);152152+153153+ switch (qp_type(qp)) {154154+ case IB_QPT_RC:155155+ if (diff > 0) {156156+ if (qp->resp.sent_psn_nak)157157+ return RESPST_CLEANUP;158158+159159+ qp->resp.sent_psn_nak = 1;160160+ return RESPST_ERR_PSN_OUT_OF_SEQ;161161+162162+ } else if (diff < 0) {163163+ return RESPST_DUPLICATE_REQUEST;164164+ }165165+166166+ if (qp->resp.sent_psn_nak)167167+ qp->resp.sent_psn_nak = 0;168168+169169+ break;170170+171171+ case IB_QPT_UC:172172+ if (qp->resp.drop_msg || diff != 0) {173173+ if (pkt->mask & RXE_START_MASK) {174174+ qp->resp.drop_msg = 0;175175+ return RESPST_CHK_OP_SEQ;176176+ }177177+178178+ qp->resp.drop_msg = 1;179179+ return RESPST_CLEANUP;180180+ }181181+ break;182182+ default:183183+ break;184184+ }185185+186186+ return RESPST_CHK_OP_SEQ;187187+}188188+189189+static enum resp_states check_op_seq(struct rxe_qp *qp,190190+ struct rxe_pkt_info *pkt)191191+{192192+ switch (qp_type(qp)) {193193+ case IB_QPT_RC:194194+ switch (qp->resp.opcode) {195195+ case IB_OPCODE_RC_SEND_FIRST:196196+ case IB_OPCODE_RC_SEND_MIDDLE:197197+ switch (pkt->opcode) {198198+ case IB_OPCODE_RC_SEND_MIDDLE:199199+ case IB_OPCODE_RC_SEND_LAST:200200+ case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:201201+ case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:202202+ return RESPST_CHK_OP_VALID;203203+ default:204204+ return RESPST_ERR_MISSING_OPCODE_LAST_C;205205+ }206206+207207+ case IB_OPCODE_RC_RDMA_WRITE_FIRST:208208+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:209209+ switch (pkt->opcode) {210210+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:211211+ case IB_OPCODE_RC_RDMA_WRITE_LAST:212212+ case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:213213+ return RESPST_CHK_OP_VALID;214214+ default:215215+ return RESPST_ERR_MISSING_OPCODE_LAST_C;216216+ }217217+218218+ default:219219+ switch (pkt->opcode) {220220+ case IB_OPCODE_RC_SEND_MIDDLE:221221+ case IB_OPCODE_RC_SEND_LAST:222222+ case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:223223+ case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:224224+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:225225+ case IB_OPCODE_RC_RDMA_WRITE_LAST:226226+ case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:227227+ return RESPST_ERR_MISSING_OPCODE_FIRST;228228+ default:229229+ return RESPST_CHK_OP_VALID;230230+ }231231+ }232232+ break;233233+234234+ case IB_QPT_UC:235235+ switch (qp->resp.opcode) {236236+ case IB_OPCODE_UC_SEND_FIRST:237237+ case IB_OPCODE_UC_SEND_MIDDLE:238238+ switch (pkt->opcode) {239239+ case IB_OPCODE_UC_SEND_MIDDLE:240240+ case IB_OPCODE_UC_SEND_LAST:241241+ case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:242242+ return RESPST_CHK_OP_VALID;243243+ default:244244+ return RESPST_ERR_MISSING_OPCODE_LAST_D1E;245245+ }246246+247247+ case IB_OPCODE_UC_RDMA_WRITE_FIRST:248248+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:249249+ switch (pkt->opcode) {250250+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:251251+ case IB_OPCODE_UC_RDMA_WRITE_LAST:252252+ case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:253253+ return RESPST_CHK_OP_VALID;254254+ default:255255+ return RESPST_ERR_MISSING_OPCODE_LAST_D1E;256256+ }257257+258258+ default:259259+ switch (pkt->opcode) {260260+ case IB_OPCODE_UC_SEND_MIDDLE:261261+ case IB_OPCODE_UC_SEND_LAST:262262+ case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:263263+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:264264+ case IB_OPCODE_UC_RDMA_WRITE_LAST:265265+ case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:266266+ qp->resp.drop_msg = 1;267267+ return RESPST_CLEANUP;268268+ default:269269+ return RESPST_CHK_OP_VALID;270270+ }271271+ }272272+ break;273273+274274+ default:275275+ return RESPST_CHK_OP_VALID;276276+ }277277+}278278+279279+static enum resp_states check_op_valid(struct rxe_qp *qp,280280+ struct rxe_pkt_info *pkt)281281+{282282+ switch (qp_type(qp)) {283283+ case IB_QPT_RC:284284+ if (((pkt->mask & RXE_READ_MASK) &&285285+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||286286+ ((pkt->mask & RXE_WRITE_MASK) &&287287+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||288288+ ((pkt->mask & RXE_ATOMIC_MASK) &&289289+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {290290+ return RESPST_ERR_UNSUPPORTED_OPCODE;291291+ }292292+293293+ break;294294+295295+ case IB_QPT_UC:296296+ if ((pkt->mask & RXE_WRITE_MASK) &&297297+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {298298+ qp->resp.drop_msg = 1;299299+ return RESPST_CLEANUP;300300+ }301301+302302+ break;303303+304304+ case IB_QPT_UD:305305+ case IB_QPT_SMI:306306+ case IB_QPT_GSI:307307+ break;308308+309309+ default:310310+ WARN_ON(1);311311+ break;312312+ }313313+314314+ return RESPST_CHK_RESOURCE;315315+}316316+317317+static enum resp_states get_srq_wqe(struct rxe_qp *qp)318318+{319319+ struct rxe_srq *srq = qp->srq;320320+ struct rxe_queue *q = srq->rq.queue;321321+ struct rxe_recv_wqe *wqe;322322+ struct ib_event ev;323323+324324+ if (srq->error)325325+ return RESPST_ERR_RNR;326326+327327+ spin_lock_bh(&srq->rq.consumer_lock);328328+329329+ wqe = queue_head(q);330330+ if (!wqe) {331331+ spin_unlock_bh(&srq->rq.consumer_lock);332332+ return RESPST_ERR_RNR;333333+ }334334+335335+ /* note kernel and user space recv wqes have same size */336336+ memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe));337337+338338+ qp->resp.wqe = &qp->resp.srq_wqe.wqe;339339+ advance_consumer(q);340340+341341+ if (srq->limit && srq->ibsrq.event_handler &&342342+ (queue_count(q) < srq->limit)) {343343+ srq->limit = 0;344344+ goto event;345345+ }346346+347347+ spin_unlock_bh(&srq->rq.consumer_lock);348348+ return RESPST_CHK_LENGTH;349349+350350+event:351351+ spin_unlock_bh(&srq->rq.consumer_lock);352352+ ev.device = qp->ibqp.device;353353+ ev.element.srq = qp->ibqp.srq;354354+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;355355+ srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);356356+ return RESPST_CHK_LENGTH;357357+}358358+359359+static enum resp_states check_resource(struct rxe_qp *qp,360360+ struct rxe_pkt_info *pkt)361361+{362362+ struct rxe_srq *srq = qp->srq;363363+364364+ if (qp->resp.state == QP_STATE_ERROR) {365365+ if (qp->resp.wqe) {366366+ qp->resp.status = IB_WC_WR_FLUSH_ERR;367367+ return RESPST_COMPLETE;368368+ } else if (!srq) {369369+ qp->resp.wqe = queue_head(qp->rq.queue);370370+ if (qp->resp.wqe) {371371+ qp->resp.status = IB_WC_WR_FLUSH_ERR;372372+ return RESPST_COMPLETE;373373+ } else {374374+ return RESPST_EXIT;375375+ }376376+ } else {377377+ return RESPST_EXIT;378378+ }379379+ }380380+381381+ if (pkt->mask & RXE_READ_OR_ATOMIC) {382382+ /* it is the requesters job to not send383383+ * too many read/atomic ops, we just384384+ * recycle the responder resource queue385385+ */386386+ if (likely(qp->attr.max_rd_atomic > 0))387387+ return RESPST_CHK_LENGTH;388388+ else389389+ return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;390390+ }391391+392392+ if (pkt->mask & RXE_RWR_MASK) {393393+ if (srq)394394+ return get_srq_wqe(qp);395395+396396+ qp->resp.wqe = queue_head(qp->rq.queue);397397+ return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;398398+ }399399+400400+ return RESPST_CHK_LENGTH;401401+}402402+403403+static enum resp_states check_length(struct rxe_qp *qp,404404+ struct rxe_pkt_info *pkt)405405+{406406+ switch (qp_type(qp)) {407407+ case IB_QPT_RC:408408+ return RESPST_CHK_RKEY;409409+410410+ case IB_QPT_UC:411411+ return RESPST_CHK_RKEY;412412+413413+ default:414414+ return RESPST_CHK_RKEY;415415+ }416416+}417417+418418+static enum resp_states check_rkey(struct rxe_qp *qp,419419+ struct rxe_pkt_info *pkt)420420+{421421+ struct rxe_mem *mem;422422+ u64 va;423423+ u32 rkey;424424+ u32 resid;425425+ u32 pktlen;426426+ int mtu = qp->mtu;427427+ enum resp_states state;428428+ int access;429429+430430+ if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {431431+ if (pkt->mask & RXE_RETH_MASK) {432432+ qp->resp.va = reth_va(pkt);433433+ qp->resp.rkey = reth_rkey(pkt);434434+ qp->resp.resid = reth_len(pkt);435435+ }436436+ access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ437437+ : IB_ACCESS_REMOTE_WRITE;438438+ } else if (pkt->mask & RXE_ATOMIC_MASK) {439439+ qp->resp.va = atmeth_va(pkt);440440+ qp->resp.rkey = atmeth_rkey(pkt);441441+ qp->resp.resid = sizeof(u64);442442+ access = IB_ACCESS_REMOTE_ATOMIC;443443+ } else {444444+ return RESPST_EXECUTE;445445+ }446446+447447+ va = qp->resp.va;448448+ rkey = qp->resp.rkey;449449+ resid = qp->resp.resid;450450+ pktlen = payload_size(pkt);451451+452452+ mem = lookup_mem(qp->pd, access, rkey, lookup_remote);453453+ if (!mem) {454454+ state = RESPST_ERR_RKEY_VIOLATION;455455+ goto err1;456456+ }457457+458458+ if (unlikely(mem->state == RXE_MEM_STATE_FREE)) {459459+ state = RESPST_ERR_RKEY_VIOLATION;460460+ goto err1;461461+ }462462+463463+ if (mem_check_range(mem, va, resid)) {464464+ state = RESPST_ERR_RKEY_VIOLATION;465465+ goto err2;466466+ }467467+468468+ if (pkt->mask & RXE_WRITE_MASK) {469469+ if (resid > mtu) {470470+ if (pktlen != mtu || bth_pad(pkt)) {471471+ state = RESPST_ERR_LENGTH;472472+ goto err2;473473+ }474474+475475+ resid = mtu;476476+ } else {477477+ if (pktlen != resid) {478478+ state = RESPST_ERR_LENGTH;479479+ goto err2;480480+ }481481+ if ((bth_pad(pkt) != (0x3 & (-resid)))) {482482+ /* This case may not be exactly that483483+ * but nothing else fits.484484+ */485485+ state = RESPST_ERR_LENGTH;486486+ goto err2;487487+ }488488+ }489489+ }490490+491491+ WARN_ON(qp->resp.mr);492492+493493+ qp->resp.mr = mem;494494+ return RESPST_EXECUTE;495495+496496+err2:497497+ rxe_drop_ref(mem);498498+err1:499499+ return state;500500+}501501+502502+static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,503503+ int data_len)504504+{505505+ int err;506506+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);507507+508508+ err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,509509+ data_addr, data_len, to_mem_obj, NULL);510510+ if (unlikely(err))511511+ return (err == -ENOSPC) ? RESPST_ERR_LENGTH512512+ : RESPST_ERR_MALFORMED_WQE;513513+514514+ return RESPST_NONE;515515+}516516+517517+static enum resp_states write_data_in(struct rxe_qp *qp,518518+ struct rxe_pkt_info *pkt)519519+{520520+ enum resp_states rc = RESPST_NONE;521521+ int err;522522+ int data_len = payload_size(pkt);523523+524524+ err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt),525525+ data_len, to_mem_obj, NULL);526526+ if (err) {527527+ rc = RESPST_ERR_RKEY_VIOLATION;528528+ goto out;529529+ }530530+531531+ qp->resp.va += data_len;532532+ qp->resp.resid -= data_len;533533+534534+out:535535+ return rc;536536+}537537+538538+/* Guarantee atomicity of atomic operations at the machine level. */539539+static DEFINE_SPINLOCK(atomic_ops_lock);540540+541541+static enum resp_states process_atomic(struct rxe_qp *qp,542542+ struct rxe_pkt_info *pkt)543543+{544544+ u64 iova = atmeth_va(pkt);545545+ u64 *vaddr;546546+ enum resp_states ret;547547+ struct rxe_mem *mr = qp->resp.mr;548548+549549+ if (mr->state != RXE_MEM_STATE_VALID) {550550+ ret = RESPST_ERR_RKEY_VIOLATION;551551+ goto out;552552+ }553553+554554+ vaddr = iova_to_vaddr(mr, iova, sizeof(u64));555555+556556+ /* check vaddr is 8 bytes aligned. */557557+ if (!vaddr || (uintptr_t)vaddr & 7) {558558+ ret = RESPST_ERR_MISALIGNED_ATOMIC;559559+ goto out;560560+ }561561+562562+ spin_lock_bh(&atomic_ops_lock);563563+564564+ qp->resp.atomic_orig = *vaddr;565565+566566+ if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||567567+ pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {568568+ if (*vaddr == atmeth_comp(pkt))569569+ *vaddr = atmeth_swap_add(pkt);570570+ } else {571571+ *vaddr += atmeth_swap_add(pkt);572572+ }573573+574574+ spin_unlock_bh(&atomic_ops_lock);575575+576576+ ret = RESPST_NONE;577577+out:578578+ return ret;579579+}580580+581581+static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,582582+ struct rxe_pkt_info *pkt,583583+ struct rxe_pkt_info *ack,584584+ int opcode,585585+ int payload,586586+ u32 psn,587587+ u8 syndrome,588588+ u32 *crcp)589589+{590590+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);591591+ struct sk_buff *skb;592592+ u32 crc = 0;593593+ u32 *p;594594+ int paylen;595595+ int pad;596596+ int err;597597+598598+ /*599599+ * allocate packet600600+ */601601+ pad = (-payload) & 0x3;602602+ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;603603+604604+ skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack);605605+ if (!skb)606606+ return NULL;607607+608608+ ack->qp = qp;609609+ ack->opcode = opcode;610610+ ack->mask = rxe_opcode[opcode].mask;611611+ ack->offset = pkt->offset;612612+ ack->paylen = paylen;613613+614614+ /* fill in bth using the request packet headers */615615+ memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES);616616+617617+ bth_set_opcode(ack, opcode);618618+ bth_set_qpn(ack, qp->attr.dest_qp_num);619619+ bth_set_pad(ack, pad);620620+ bth_set_se(ack, 0);621621+ bth_set_psn(ack, psn);622622+ bth_set_ack(ack, 0);623623+ ack->psn = psn;624624+625625+ if (ack->mask & RXE_AETH_MASK) {626626+ aeth_set_syn(ack, syndrome);627627+ aeth_set_msn(ack, qp->resp.msn);628628+ }629629+630630+ if (ack->mask & RXE_ATMACK_MASK)631631+ atmack_set_orig(ack, qp->resp.atomic_orig);632632+633633+ err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc);634634+ if (err) {635635+ kfree_skb(skb);636636+ return NULL;637637+ }638638+639639+ if (crcp) {640640+ /* CRC computation will be continued by the caller */641641+ *crcp = crc;642642+ } else {643643+ p = payload_addr(ack) + payload + bth_pad(ack);644644+ *p = ~crc;645645+ }646646+647647+ return skb;648648+}649649+650650+/* RDMA read response. If res is not NULL, then we have a current RDMA request651651+ * being processed or replayed.652652+ */653653+static enum resp_states read_reply(struct rxe_qp *qp,654654+ struct rxe_pkt_info *req_pkt)655655+{656656+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);657657+ struct rxe_pkt_info ack_pkt;658658+ struct sk_buff *skb;659659+ int mtu = qp->mtu;660660+ enum resp_states state;661661+ int payload;662662+ int opcode;663663+ int err;664664+ struct resp_res *res = qp->resp.res;665665+ u32 icrc;666666+ u32 *p;667667+668668+ if (!res) {669669+ /* This is the first time we process that request. Get a670670+ * resource671671+ */672672+ res = &qp->resp.resources[qp->resp.res_head];673673+674674+ free_rd_atomic_resource(qp, res);675675+ rxe_advance_resp_resource(qp);676676+677677+ res->type = RXE_READ_MASK;678678+679679+ res->read.va = qp->resp.va;680680+ res->read.va_org = qp->resp.va;681681+682682+ res->first_psn = req_pkt->psn;683683+ res->last_psn = req_pkt->psn +684684+ (reth_len(req_pkt) + mtu - 1) /685685+ mtu - 1;686686+ res->cur_psn = req_pkt->psn;687687+688688+ res->read.resid = qp->resp.resid;689689+ res->read.length = qp->resp.resid;690690+ res->read.rkey = qp->resp.rkey;691691+692692+ /* note res inherits the reference to mr from qp */693693+ res->read.mr = qp->resp.mr;694694+ qp->resp.mr = NULL;695695+696696+ qp->resp.res = res;697697+ res->state = rdatm_res_state_new;698698+ }699699+700700+ if (res->state == rdatm_res_state_new) {701701+ if (res->read.resid <= mtu)702702+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;703703+ else704704+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;705705+ } else {706706+ if (res->read.resid > mtu)707707+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;708708+ else709709+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;710710+ }711711+712712+ res->state = rdatm_res_state_next;713713+714714+ payload = min_t(int, res->read.resid, mtu);715715+716716+ skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,717717+ res->cur_psn, AETH_ACK_UNLIMITED, &icrc);718718+ if (!skb)719719+ return RESPST_ERR_RNR;720720+721721+ err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),722722+ payload, from_mem_obj, &icrc);723723+ if (err)724724+ pr_err("Failed copying memory\n");725725+726726+ p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);727727+ *p = ~icrc;728728+729729+ err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);730730+ if (err) {731731+ pr_err("Failed sending RDMA reply.\n");732732+ kfree_skb(skb);733733+ return RESPST_ERR_RNR;734734+ }735735+736736+ res->read.va += payload;737737+ res->read.resid -= payload;738738+ res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;739739+740740+ if (res->read.resid > 0) {741741+ state = RESPST_DONE;742742+ } else {743743+ qp->resp.res = NULL;744744+ qp->resp.opcode = -1;745745+ qp->resp.psn = res->cur_psn;746746+ state = RESPST_CLEANUP;747747+ }748748+749749+ return state;750750+}751751+752752+/* Executes a new request. A retried request never reach that function (send753753+ * and writes are discarded, and reads and atomics are retried elsewhere.754754+ */755755+static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)756756+{757757+ enum resp_states err;758758+759759+ if (pkt->mask & RXE_SEND_MASK) {760760+ if (qp_type(qp) == IB_QPT_UD ||761761+ qp_type(qp) == IB_QPT_SMI ||762762+ qp_type(qp) == IB_QPT_GSI) {763763+ union rdma_network_hdr hdr;764764+ struct sk_buff *skb = PKT_TO_SKB(pkt);765765+766766+ memset(&hdr, 0, sizeof(hdr));767767+ if (skb->protocol == htons(ETH_P_IP))768768+ memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh));769769+ else if (skb->protocol == htons(ETH_P_IPV6))770770+ memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh));771771+772772+ err = send_data_in(qp, &hdr, sizeof(hdr));773773+ if (err)774774+ return err;775775+ }776776+ err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));777777+ if (err)778778+ return err;779779+ } else if (pkt->mask & RXE_WRITE_MASK) {780780+ err = write_data_in(qp, pkt);781781+ if (err)782782+ return err;783783+ } else if (pkt->mask & RXE_READ_MASK) {784784+ /* For RDMA Read we can increment the msn now. See C9-148. */785785+ qp->resp.msn++;786786+ return RESPST_READ_REPLY;787787+ } else if (pkt->mask & RXE_ATOMIC_MASK) {788788+ err = process_atomic(qp, pkt);789789+ if (err)790790+ return err;791791+ } else792792+ /* Unreachable */793793+ WARN_ON(1);794794+795795+ /* We successfully processed this new request. */796796+ qp->resp.msn++;797797+798798+ /* next expected psn, read handles this separately */799799+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;800800+801801+ qp->resp.opcode = pkt->opcode;802802+ qp->resp.status = IB_WC_SUCCESS;803803+804804+ if (pkt->mask & RXE_COMP_MASK)805805+ return RESPST_COMPLETE;806806+ else if (qp_type(qp) == IB_QPT_RC)807807+ return RESPST_ACKNOWLEDGE;808808+ else809809+ return RESPST_CLEANUP;810810+}811811+812812+static enum resp_states do_complete(struct rxe_qp *qp,813813+ struct rxe_pkt_info *pkt)814814+{815815+ struct rxe_cqe cqe;816816+ struct ib_wc *wc = &cqe.ibwc;817817+ struct ib_uverbs_wc *uwc = &cqe.uibwc;818818+ struct rxe_recv_wqe *wqe = qp->resp.wqe;819819+820820+ if (unlikely(!wqe))821821+ return RESPST_CLEANUP;822822+823823+ memset(&cqe, 0, sizeof(cqe));824824+825825+ wc->wr_id = wqe->wr_id;826826+ wc->status = qp->resp.status;827827+ wc->qp = &qp->ibqp;828828+829829+ /* fields after status are not required for errors */830830+ if (wc->status == IB_WC_SUCCESS) {831831+ wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&832832+ pkt->mask & RXE_WRITE_MASK) ?833833+ IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;834834+ wc->vendor_err = 0;835835+ wc->byte_len = wqe->dma.length - wqe->dma.resid;836836+837837+ /* fields after byte_len are different between kernel and user838838+ * space839839+ */840840+ if (qp->rcq->is_user) {841841+ uwc->wc_flags = IB_WC_GRH;842842+843843+ if (pkt->mask & RXE_IMMDT_MASK) {844844+ uwc->wc_flags |= IB_WC_WITH_IMM;845845+ uwc->ex.imm_data =846846+ (__u32 __force)immdt_imm(pkt);847847+ }848848+849849+ if (pkt->mask & RXE_IETH_MASK) {850850+ uwc->wc_flags |= IB_WC_WITH_INVALIDATE;851851+ uwc->ex.invalidate_rkey = ieth_rkey(pkt);852852+ }853853+854854+ uwc->qp_num = qp->ibqp.qp_num;855855+856856+ if (pkt->mask & RXE_DETH_MASK)857857+ uwc->src_qp = deth_sqp(pkt);858858+859859+ uwc->port_num = qp->attr.port_num;860860+ } else {861861+ struct sk_buff *skb = PKT_TO_SKB(pkt);862862+863863+ wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;864864+ if (skb->protocol == htons(ETH_P_IP))865865+ wc->network_hdr_type = RDMA_NETWORK_IPV4;866866+ else867867+ wc->network_hdr_type = RDMA_NETWORK_IPV6;868868+869869+ if (pkt->mask & RXE_IMMDT_MASK) {870870+ wc->wc_flags |= IB_WC_WITH_IMM;871871+ wc->ex.imm_data = immdt_imm(pkt);872872+ }873873+874874+ if (pkt->mask & RXE_IETH_MASK) {875875+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);876876+ struct rxe_mem *rmr;877877+878878+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;879879+ wc->ex.invalidate_rkey = ieth_rkey(pkt);880880+881881+ rmr = rxe_pool_get_index(&rxe->mr_pool,882882+ wc->ex.invalidate_rkey >> 8);883883+ if (unlikely(!rmr)) {884884+ pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey);885885+ return RESPST_ERROR;886886+ }887887+ rmr->state = RXE_MEM_STATE_FREE;888888+ }889889+890890+ wc->qp = &qp->ibqp;891891+892892+ if (pkt->mask & RXE_DETH_MASK)893893+ wc->src_qp = deth_sqp(pkt);894894+895895+ wc->port_num = qp->attr.port_num;896896+ }897897+ }898898+899899+ /* have copy for srq and reference for !srq */900900+ if (!qp->srq)901901+ advance_consumer(qp->rq.queue);902902+903903+ qp->resp.wqe = NULL;904904+905905+ if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))906906+ return RESPST_ERR_CQ_OVERFLOW;907907+908908+ if (qp->resp.state == QP_STATE_ERROR)909909+ return RESPST_CHK_RESOURCE;910910+911911+ if (!pkt)912912+ return RESPST_DONE;913913+ else if (qp_type(qp) == IB_QPT_RC)914914+ return RESPST_ACKNOWLEDGE;915915+ else916916+ return RESPST_CLEANUP;917917+}918918+919919+static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,920920+ u8 syndrome, u32 psn)921921+{922922+ int err = 0;923923+ struct rxe_pkt_info ack_pkt;924924+ struct sk_buff *skb;925925+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);926926+927927+ skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,928928+ 0, psn, syndrome, NULL);929929+ if (!skb) {930930+ err = -ENOMEM;931931+ goto err1;932932+ }933933+934934+ err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);935935+ if (err) {936936+ pr_err_ratelimited("Failed sending ack\n");937937+ kfree_skb(skb);938938+ }939939+940940+err1:941941+ return err;942942+}943943+944944+static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,945945+ u8 syndrome)946946+{947947+ int rc = 0;948948+ struct rxe_pkt_info ack_pkt;949949+ struct sk_buff *skb;950950+ struct sk_buff *skb_copy;951951+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);952952+ struct resp_res *res;953953+954954+ skb = prepare_ack_packet(qp, pkt, &ack_pkt,955955+ IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,956956+ syndrome, NULL);957957+ if (!skb) {958958+ rc = -ENOMEM;959959+ goto out;960960+ }961961+962962+ skb_copy = skb_clone(skb, GFP_ATOMIC);963963+ if (skb_copy)964964+ rxe_add_ref(qp); /* for the new SKB */965965+ else {966966+ pr_warn("Could not clone atomic response\n");967967+ rc = -ENOMEM;968968+ goto out;969969+ }970970+971971+ res = &qp->resp.resources[qp->resp.res_head];972972+ free_rd_atomic_resource(qp, res);973973+ rxe_advance_resp_resource(qp);974974+975975+ res->type = RXE_ATOMIC_MASK;976976+ res->atomic.skb = skb;977977+ res->first_psn = qp->resp.psn;978978+ res->last_psn = qp->resp.psn;979979+ res->cur_psn = qp->resp.psn;980980+981981+ rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);982982+ if (rc) {983983+ pr_err_ratelimited("Failed sending ack\n");984984+ rxe_drop_ref(qp);985985+ kfree_skb(skb_copy);986986+ }987987+988988+out:989989+ return rc;990990+}991991+992992+static enum resp_states acknowledge(struct rxe_qp *qp,993993+ struct rxe_pkt_info *pkt)994994+{995995+ if (qp_type(qp) != IB_QPT_RC)996996+ return RESPST_CLEANUP;997997+998998+ if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)999999+ send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);10001000+ else if (pkt->mask & RXE_ATOMIC_MASK)10011001+ send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);10021002+ else if (bth_ack(pkt))10031003+ send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);10041004+10051005+ return RESPST_CLEANUP;10061006+}10071007+10081008+static enum resp_states cleanup(struct rxe_qp *qp,10091009+ struct rxe_pkt_info *pkt)10101010+{10111011+ struct sk_buff *skb;10121012+10131013+ if (pkt) {10141014+ skb = skb_dequeue(&qp->req_pkts);10151015+ rxe_drop_ref(qp);10161016+ kfree_skb(skb);10171017+ }10181018+10191019+ if (qp->resp.mr) {10201020+ rxe_drop_ref(qp->resp.mr);10211021+ qp->resp.mr = NULL;10221022+ }10231023+10241024+ return RESPST_DONE;10251025+}10261026+10271027+static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)10281028+{10291029+ int i;10301030+10311031+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {10321032+ struct resp_res *res = &qp->resp.resources[i];10331033+10341034+ if (res->type == 0)10351035+ continue;10361036+10371037+ if (psn_compare(psn, res->first_psn) >= 0 &&10381038+ psn_compare(psn, res->last_psn) <= 0) {10391039+ return res;10401040+ }10411041+ }10421042+10431043+ return NULL;10441044+}10451045+10461046+static enum resp_states duplicate_request(struct rxe_qp *qp,10471047+ struct rxe_pkt_info *pkt)10481048+{10491049+ enum resp_states rc;10501050+10511051+ if (pkt->mask & RXE_SEND_MASK ||10521052+ pkt->mask & RXE_WRITE_MASK) {10531053+ /* SEND. Ack again and cleanup. C9-105. */10541054+ if (bth_ack(pkt))10551055+ send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1);10561056+ rc = RESPST_CLEANUP;10571057+ goto out;10581058+ } else if (pkt->mask & RXE_READ_MASK) {10591059+ struct resp_res *res;10601060+10611061+ res = find_resource(qp, pkt->psn);10621062+ if (!res) {10631063+ /* Resource not found. Class D error. Drop the10641064+ * request.10651065+ */10661066+ rc = RESPST_CLEANUP;10671067+ goto out;10681068+ } else {10691069+ /* Ensure this new request is the same as the previous10701070+ * one or a subset of it.10711071+ */10721072+ u64 iova = reth_va(pkt);10731073+ u32 resid = reth_len(pkt);10741074+10751075+ if (iova < res->read.va_org ||10761076+ resid > res->read.length ||10771077+ (iova + resid) > (res->read.va_org +10781078+ res->read.length)) {10791079+ rc = RESPST_CLEANUP;10801080+ goto out;10811081+ }10821082+10831083+ if (reth_rkey(pkt) != res->read.rkey) {10841084+ rc = RESPST_CLEANUP;10851085+ goto out;10861086+ }10871087+10881088+ res->cur_psn = pkt->psn;10891089+ res->state = (pkt->psn == res->first_psn) ?10901090+ rdatm_res_state_new :10911091+ rdatm_res_state_replay;10921092+10931093+ /* Reset the resource, except length. */10941094+ res->read.va_org = iova;10951095+ res->read.va = iova;10961096+ res->read.resid = resid;10971097+10981098+ /* Replay the RDMA read reply. */10991099+ qp->resp.res = res;11001100+ rc = RESPST_READ_REPLY;11011101+ goto out;11021102+ }11031103+ } else {11041104+ struct resp_res *res;11051105+11061106+ /* Find the operation in our list of responder resources. */11071107+ res = find_resource(qp, pkt->psn);11081108+ if (res) {11091109+ struct sk_buff *skb_copy;11101110+11111111+ skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC);11121112+ if (skb_copy) {11131113+ rxe_add_ref(qp); /* for the new SKB */11141114+ } else {11151115+ pr_warn("Couldn't clone atomic resp\n");11161116+ rc = RESPST_CLEANUP;11171117+ goto out;11181118+ }11191119+ bth_set_psn(SKB_TO_PKT(skb_copy),11201120+ qp->resp.psn - 1);11211121+ /* Resend the result. */11221122+ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,11231123+ pkt, skb_copy);11241124+ if (rc) {11251125+ pr_err("Failed resending result. This flow is not handled - skb ignored\n");11261126+ kfree_skb(skb_copy);11271127+ rc = RESPST_CLEANUP;11281128+ goto out;11291129+ }11301130+ }11311131+11321132+ /* Resource not found. Class D error. Drop the request. */11331133+ rc = RESPST_CLEANUP;11341134+ goto out;11351135+ }11361136+out:11371137+ return rc;11381138+}11391139+11401140+/* Process a class A or C. Both are treated the same in this implementation. */11411141+static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,11421142+ enum ib_wc_status status)11431143+{11441144+ qp->resp.aeth_syndrome = syndrome;11451145+ qp->resp.status = status;11461146+11471147+ /* indicate that we should go through the ERROR state */11481148+ qp->resp.goto_error = 1;11491149+}11501150+11511151+static enum resp_states do_class_d1e_error(struct rxe_qp *qp)11521152+{11531153+ /* UC */11541154+ if (qp->srq) {11551155+ /* Class E */11561156+ qp->resp.drop_msg = 1;11571157+ if (qp->resp.wqe) {11581158+ qp->resp.status = IB_WC_REM_INV_REQ_ERR;11591159+ return RESPST_COMPLETE;11601160+ } else {11611161+ return RESPST_CLEANUP;11621162+ }11631163+ } else {11641164+ /* Class D1. This packet may be the start of a11651165+ * new message and could be valid. The previous11661166+ * message is invalid and ignored. reset the11671167+ * recv wr to its original state11681168+ */11691169+ if (qp->resp.wqe) {11701170+ qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;11711171+ qp->resp.wqe->dma.cur_sge = 0;11721172+ qp->resp.wqe->dma.sge_offset = 0;11731173+ qp->resp.opcode = -1;11741174+ }11751175+11761176+ if (qp->resp.mr) {11771177+ rxe_drop_ref(qp->resp.mr);11781178+ qp->resp.mr = NULL;11791179+ }11801180+11811181+ return RESPST_CLEANUP;11821182+ }11831183+}11841184+11851185+int rxe_responder(void *arg)11861186+{11871187+ struct rxe_qp *qp = (struct rxe_qp *)arg;11881188+ enum resp_states state;11891189+ struct rxe_pkt_info *pkt = NULL;11901190+ int ret = 0;11911191+11921192+ qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;11931193+11941194+ if (!qp->valid) {11951195+ ret = -EINVAL;11961196+ goto done;11971197+ }11981198+11991199+ switch (qp->resp.state) {12001200+ case QP_STATE_RESET:12011201+ state = RESPST_RESET;12021202+ break;12031203+12041204+ default:12051205+ state = RESPST_GET_REQ;12061206+ break;12071207+ }12081208+12091209+ while (1) {12101210+ pr_debug("state = %s\n", resp_state_name[state]);12111211+ switch (state) {12121212+ case RESPST_GET_REQ:12131213+ state = get_req(qp, &pkt);12141214+ break;12151215+ case RESPST_CHK_PSN:12161216+ state = check_psn(qp, pkt);12171217+ break;12181218+ case RESPST_CHK_OP_SEQ:12191219+ state = check_op_seq(qp, pkt);12201220+ break;12211221+ case RESPST_CHK_OP_VALID:12221222+ state = check_op_valid(qp, pkt);12231223+ break;12241224+ case RESPST_CHK_RESOURCE:12251225+ state = check_resource(qp, pkt);12261226+ break;12271227+ case RESPST_CHK_LENGTH:12281228+ state = check_length(qp, pkt);12291229+ break;12301230+ case RESPST_CHK_RKEY:12311231+ state = check_rkey(qp, pkt);12321232+ break;12331233+ case RESPST_EXECUTE:12341234+ state = execute(qp, pkt);12351235+ break;12361236+ case RESPST_COMPLETE:12371237+ state = do_complete(qp, pkt);12381238+ break;12391239+ case RESPST_READ_REPLY:12401240+ state = read_reply(qp, pkt);12411241+ break;12421242+ case RESPST_ACKNOWLEDGE:12431243+ state = acknowledge(qp, pkt);12441244+ break;12451245+ case RESPST_CLEANUP:12461246+ state = cleanup(qp, pkt);12471247+ break;12481248+ case RESPST_DUPLICATE_REQUEST:12491249+ state = duplicate_request(qp, pkt);12501250+ break;12511251+ case RESPST_ERR_PSN_OUT_OF_SEQ:12521252+ /* RC only - Class B. Drop packet. */12531253+ send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);12541254+ state = RESPST_CLEANUP;12551255+ break;12561256+12571257+ case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:12581258+ case RESPST_ERR_MISSING_OPCODE_FIRST:12591259+ case RESPST_ERR_MISSING_OPCODE_LAST_C:12601260+ case RESPST_ERR_UNSUPPORTED_OPCODE:12611261+ case RESPST_ERR_MISALIGNED_ATOMIC:12621262+ /* RC Only - Class C. */12631263+ do_class_ac_error(qp, AETH_NAK_INVALID_REQ,12641264+ IB_WC_REM_INV_REQ_ERR);12651265+ state = RESPST_COMPLETE;12661266+ break;12671267+12681268+ case RESPST_ERR_MISSING_OPCODE_LAST_D1E:12691269+ state = do_class_d1e_error(qp);12701270+ break;12711271+ case RESPST_ERR_RNR:12721272+ if (qp_type(qp) == IB_QPT_RC) {12731273+ /* RC - class B */12741274+ send_ack(qp, pkt, AETH_RNR_NAK |12751275+ (~AETH_TYPE_MASK &12761276+ qp->attr.min_rnr_timer),12771277+ pkt->psn);12781278+ } else {12791279+ /* UD/UC - class D */12801280+ qp->resp.drop_msg = 1;12811281+ }12821282+ state = RESPST_CLEANUP;12831283+ break;12841284+12851285+ case RESPST_ERR_RKEY_VIOLATION:12861286+ if (qp_type(qp) == IB_QPT_RC) {12871287+ /* Class C */12881288+ do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,12891289+ IB_WC_REM_ACCESS_ERR);12901290+ state = RESPST_COMPLETE;12911291+ } else {12921292+ qp->resp.drop_msg = 1;12931293+ if (qp->srq) {12941294+ /* UC/SRQ Class D */12951295+ qp->resp.status = IB_WC_REM_ACCESS_ERR;12961296+ state = RESPST_COMPLETE;12971297+ } else {12981298+ /* UC/non-SRQ Class E. */12991299+ state = RESPST_CLEANUP;13001300+ }13011301+ }13021302+ break;13031303+13041304+ case RESPST_ERR_LENGTH:13051305+ if (qp_type(qp) == IB_QPT_RC) {13061306+ /* Class C */13071307+ do_class_ac_error(qp, AETH_NAK_INVALID_REQ,13081308+ IB_WC_REM_INV_REQ_ERR);13091309+ state = RESPST_COMPLETE;13101310+ } else if (qp->srq) {13111311+ /* UC/UD - class E */13121312+ qp->resp.status = IB_WC_REM_INV_REQ_ERR;13131313+ state = RESPST_COMPLETE;13141314+ } else {13151315+ /* UC/UD - class D */13161316+ qp->resp.drop_msg = 1;13171317+ state = RESPST_CLEANUP;13181318+ }13191319+ break;13201320+13211321+ case RESPST_ERR_MALFORMED_WQE:13221322+ /* All, Class A. */13231323+ do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,13241324+ IB_WC_LOC_QP_OP_ERR);13251325+ state = RESPST_COMPLETE;13261326+ break;13271327+13281328+ case RESPST_ERR_CQ_OVERFLOW:13291329+ /* All - Class G */13301330+ state = RESPST_ERROR;13311331+ break;13321332+13331333+ case RESPST_DONE:13341334+ if (qp->resp.goto_error) {13351335+ state = RESPST_ERROR;13361336+ break;13371337+ }13381338+13391339+ goto done;13401340+13411341+ case RESPST_EXIT:13421342+ if (qp->resp.goto_error) {13431343+ state = RESPST_ERROR;13441344+ break;13451345+ }13461346+13471347+ goto exit;13481348+13491349+ case RESPST_RESET: {13501350+ struct sk_buff *skb;13511351+13521352+ while ((skb = skb_dequeue(&qp->req_pkts))) {13531353+ rxe_drop_ref(qp);13541354+ kfree_skb(skb);13551355+ }13561356+13571357+ while (!qp->srq && qp->rq.queue &&13581358+ queue_head(qp->rq.queue))13591359+ advance_consumer(qp->rq.queue);13601360+13611361+ qp->resp.wqe = NULL;13621362+ goto exit;13631363+ }13641364+13651365+ case RESPST_ERROR:13661366+ qp->resp.goto_error = 0;13671367+ pr_warn("qp#%d moved to error state\n", qp_num(qp));13681368+ rxe_qp_error(qp);13691369+ goto exit;13701370+13711371+ default:13721372+ WARN_ON(1);13731373+ }13741374+ }13751375+13761376+exit:13771377+ ret = -EAGAIN;13781378+done:13791379+ return ret;13801380+}
+193
drivers/infiniband/sw/rxe/rxe_srq.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+#include "rxe_queue.h"3737+3838+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,3939+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)4040+{4141+ if (srq && srq->error) {4242+ pr_warn("srq in error state\n");4343+ goto err1;4444+ }4545+4646+ if (mask & IB_SRQ_MAX_WR) {4747+ if (attr->max_wr > rxe->attr.max_srq_wr) {4848+ pr_warn("max_wr(%d) > max_srq_wr(%d)\n",4949+ attr->max_wr, rxe->attr.max_srq_wr);5050+ goto err1;5151+ }5252+5353+ if (attr->max_wr <= 0) {5454+ pr_warn("max_wr(%d) <= 0\n", attr->max_wr);5555+ goto err1;5656+ }5757+5858+ if (srq && srq->limit && (attr->max_wr < srq->limit)) {5959+ pr_warn("max_wr (%d) < srq->limit (%d)\n",6060+ attr->max_wr, srq->limit);6161+ goto err1;6262+ }6363+6464+ if (attr->max_wr < RXE_MIN_SRQ_WR)6565+ attr->max_wr = RXE_MIN_SRQ_WR;6666+ }6767+6868+ if (mask & IB_SRQ_LIMIT) {6969+ if (attr->srq_limit > rxe->attr.max_srq_wr) {7070+ pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",7171+ attr->srq_limit, rxe->attr.max_srq_wr);7272+ goto err1;7373+ }7474+7575+ if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {7676+ pr_warn("srq_limit (%d) > cur limit(%d)\n",7777+ attr->srq_limit,7878+ srq->rq.queue->buf->index_mask);7979+ goto err1;8080+ }8181+ }8282+8383+ if (mask == IB_SRQ_INIT_MASK) {8484+ if (attr->max_sge > rxe->attr.max_srq_sge) {8585+ pr_warn("max_sge(%d) > max_srq_sge(%d)\n",8686+ attr->max_sge, rxe->attr.max_srq_sge);8787+ goto err1;8888+ }8989+9090+ if (attr->max_sge < RXE_MIN_SRQ_SGE)9191+ attr->max_sge = RXE_MIN_SRQ_SGE;9292+ }9393+9494+ return 0;9595+9696+err1:9797+ return -EINVAL;9898+}9999+100100+int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,101101+ struct ib_srq_init_attr *init,102102+ struct ib_ucontext *context, struct ib_udata *udata)103103+{104104+ int err;105105+ int srq_wqe_size;106106+ struct rxe_queue *q;107107+108108+ srq->ibsrq.event_handler = init->event_handler;109109+ srq->ibsrq.srq_context = init->srq_context;110110+ srq->limit = init->attr.srq_limit;111111+ srq->srq_num = srq->pelem.index;112112+ srq->rq.max_wr = init->attr.max_wr;113113+ srq->rq.max_sge = init->attr.max_sge;114114+115115+ srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);116116+117117+ spin_lock_init(&srq->rq.producer_lock);118118+ spin_lock_init(&srq->rq.consumer_lock);119119+120120+ q = rxe_queue_init(rxe, &srq->rq.max_wr,121121+ srq_wqe_size);122122+ if (!q) {123123+ pr_warn("unable to allocate queue for srq\n");124124+ return -ENOMEM;125125+ }126126+127127+ srq->rq.queue = q;128128+129129+ err = do_mmap_info(rxe, udata, false, context, q->buf,130130+ q->buf_size, &q->ip);131131+ if (err)132132+ return err;133133+134134+ if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) {135135+ if (copy_to_user(udata->outbuf + sizeof(struct mminfo),136136+ &srq->srq_num, sizeof(u32)))137137+ return -EFAULT;138138+ }139139+ return 0;140140+}141141+142142+int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,143143+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,144144+ struct ib_udata *udata)145145+{146146+ int err;147147+ struct rxe_queue *q = srq->rq.queue;148148+ struct mminfo mi = { .offset = 1, .size = 0};149149+150150+ if (mask & IB_SRQ_MAX_WR) {151151+ /* Check that we can write the mminfo struct to user space */152152+ if (udata && udata->inlen >= sizeof(__u64)) {153153+ __u64 mi_addr;154154+155155+ /* Get address of user space mminfo struct */156156+ err = ib_copy_from_udata(&mi_addr, udata,157157+ sizeof(mi_addr));158158+ if (err)159159+ goto err1;160160+161161+ udata->outbuf = (void __user *)(unsigned long)mi_addr;162162+ udata->outlen = sizeof(mi);163163+164164+ if (!access_ok(VERIFY_WRITE,165165+ (void __user *)udata->outbuf,166166+ udata->outlen)) {167167+ err = -EFAULT;168168+ goto err1;169169+ }170170+ }171171+172172+ err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,173173+ rcv_wqe_size(srq->rq.max_sge),174174+ srq->rq.queue->ip ?175175+ srq->rq.queue->ip->context :176176+ NULL,177177+ udata, &srq->rq.producer_lock,178178+ &srq->rq.consumer_lock);179179+ if (err)180180+ goto err2;181181+ }182182+183183+ if (mask & IB_SRQ_LIMIT)184184+ srq->limit = attr->srq_limit;185185+186186+ return 0;187187+188188+err2:189189+ rxe_queue_cleanup(q);190190+ srq->rq.queue = NULL;191191+err1:192192+ return err;193193+}
+157
drivers/infiniband/sw/rxe/rxe_sysfs.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_net.h"3636+3737+/* Copy argument and remove trailing CR. Return the new length. */3838+static int sanitize_arg(const char *val, char *intf, int intf_len)3939+{4040+ int len;4141+4242+ if (!val)4343+ return 0;4444+4545+ /* Remove newline. */4646+ for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)4747+ intf[len] = val[len];4848+ intf[len] = 0;4949+5050+ if (len == 0 || (val[len] != 0 && val[len] != '\n'))5151+ return 0;5252+5353+ return len;5454+}5555+5656+static void rxe_set_port_state(struct net_device *ndev)5757+{5858+ struct rxe_dev *rxe = net_to_rxe(ndev);5959+ bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);6060+6161+ if (!rxe)6262+ goto out;6363+6464+ if (is_up)6565+ rxe_port_up(rxe);6666+ else6767+ rxe_port_down(rxe); /* down for unknown state */6868+out:6969+ return;7070+}7171+7272+static int rxe_param_set_add(const char *val, const struct kernel_param *kp)7373+{7474+ int len;7575+ int err = 0;7676+ char intf[32];7777+ struct net_device *ndev = NULL;7878+ struct rxe_dev *rxe;7979+8080+ len = sanitize_arg(val, intf, sizeof(intf));8181+ if (!len) {8282+ pr_err("rxe: add: invalid interface name\n");8383+ err = -EINVAL;8484+ goto err;8585+ }8686+8787+ ndev = dev_get_by_name(&init_net, intf);8888+ if (!ndev) {8989+ pr_err("interface %s not found\n", intf);9090+ err = -EINVAL;9191+ goto err;9292+ }9393+9494+ if (net_to_rxe(ndev)) {9595+ pr_err("rxe: already configured on %s\n", intf);9696+ err = -EINVAL;9797+ goto err;9898+ }9999+100100+ rxe = rxe_net_add(ndev);101101+ if (!rxe) {102102+ pr_err("rxe: failed to add %s\n", intf);103103+ err = -EINVAL;104104+ goto err;105105+ }106106+107107+ rxe_set_port_state(ndev);108108+ pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf);109109+err:110110+ if (ndev)111111+ dev_put(ndev);112112+ return err;113113+}114114+115115+static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)116116+{117117+ int len;118118+ char intf[32];119119+ struct rxe_dev *rxe;120120+121121+ len = sanitize_arg(val, intf, sizeof(intf));122122+ if (!len) {123123+ pr_err("rxe: add: invalid interface name\n");124124+ return -EINVAL;125125+ }126126+127127+ if (strncmp("all", intf, len) == 0) {128128+ pr_info("rxe_sys: remove all");129129+ rxe_remove_all();130130+ return 0;131131+ }132132+133133+ rxe = get_rxe_by_name(intf);134134+135135+ if (!rxe) {136136+ pr_err("rxe: not configured on %s\n", intf);137137+ return -EINVAL;138138+ }139139+140140+ list_del(&rxe->list);141141+ rxe_remove(rxe);142142+143143+ return 0;144144+}145145+146146+static const struct kernel_param_ops rxe_add_ops = {147147+ .set = rxe_param_set_add,148148+};149149+150150+static const struct kernel_param_ops rxe_remove_ops = {151151+ .set = rxe_param_set_remove,152152+};153153+154154+module_param_cb(add, &rxe_add_ops, NULL, 0200);155155+MODULE_PARM_DESC(add, "Create RXE device over network interface");156156+module_param_cb(remove, &rxe_remove_ops, NULL, 0200);157157+MODULE_PARM_DESC(remove, "Remove RXE device over network interface");
+154
drivers/infiniband/sw/rxe/rxe_task.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include <linux/kernel.h>3535+#include <linux/interrupt.h>3636+#include <linux/hardirq.h>3737+3838+#include "rxe_task.h"3939+4040+int __rxe_do_task(struct rxe_task *task)4141+4242+{4343+ int ret;4444+4545+ while ((ret = task->func(task->arg)) == 0)4646+ ;4747+4848+ task->ret = ret;4949+5050+ return ret;5151+}5252+5353+/*5454+ * this locking is due to a potential race where5555+ * a second caller finds the task already running5656+ * but looks just after the last call to func5757+ */5858+void rxe_do_task(unsigned long data)5959+{6060+ int cont;6161+ int ret;6262+ unsigned long flags;6363+ struct rxe_task *task = (struct rxe_task *)data;6464+6565+ spin_lock_irqsave(&task->state_lock, flags);6666+ switch (task->state) {6767+ case TASK_STATE_START:6868+ task->state = TASK_STATE_BUSY;6969+ spin_unlock_irqrestore(&task->state_lock, flags);7070+ break;7171+7272+ case TASK_STATE_BUSY:7373+ task->state = TASK_STATE_ARMED;7474+ /* fall through to */7575+ case TASK_STATE_ARMED:7676+ spin_unlock_irqrestore(&task->state_lock, flags);7777+ return;7878+7979+ default:8080+ spin_unlock_irqrestore(&task->state_lock, flags);8181+ pr_warn("bad state = %d in rxe_do_task\n", task->state);8282+ return;8383+ }8484+8585+ do {8686+ cont = 0;8787+ ret = task->func(task->arg);8888+8989+ spin_lock_irqsave(&task->state_lock, flags);9090+ switch (task->state) {9191+ case TASK_STATE_BUSY:9292+ if (ret)9393+ task->state = TASK_STATE_START;9494+ else9595+ cont = 1;9696+ break;9797+9898+ /* soneone tried to run the task since the last time we called9999+ * func, so we will call one more time regardless of the100100+ * return value101101+ */102102+ case TASK_STATE_ARMED:103103+ task->state = TASK_STATE_BUSY;104104+ cont = 1;105105+ break;106106+107107+ default:108108+ pr_warn("bad state = %d in rxe_do_task\n",109109+ task->state);110110+ }111111+ spin_unlock_irqrestore(&task->state_lock, flags);112112+ } while (cont);113113+114114+ task->ret = ret;115115+}116116+117117+int rxe_init_task(void *obj, struct rxe_task *task,118118+ void *arg, int (*func)(void *), char *name)119119+{120120+ task->obj = obj;121121+ task->arg = arg;122122+ task->func = func;123123+ snprintf(task->name, sizeof(task->name), "%s", name);124124+125125+ tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);126126+127127+ task->state = TASK_STATE_START;128128+ spin_lock_init(&task->state_lock);129129+130130+ return 0;131131+}132132+133133+void rxe_cleanup_task(struct rxe_task *task)134134+{135135+ tasklet_kill(&task->tasklet);136136+}137137+138138+void rxe_run_task(struct rxe_task *task, int sched)139139+{140140+ if (sched)141141+ tasklet_schedule(&task->tasklet);142142+ else143143+ rxe_do_task((unsigned long)task);144144+}145145+146146+void rxe_disable_task(struct rxe_task *task)147147+{148148+ tasklet_disable(&task->tasklet);149149+}150150+151151+void rxe_enable_task(struct rxe_task *task)152152+{153153+ tasklet_enable(&task->tasklet);154154+}
+95
drivers/infiniband/sw/rxe/rxe_task.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_TASK_H3535+#define RXE_TASK_H3636+3737+enum {3838+ TASK_STATE_START = 0,3939+ TASK_STATE_BUSY = 1,4040+ TASK_STATE_ARMED = 2,4141+};4242+4343+/*4444+ * data structure to describe a 'task' which is a short4545+ * function that returns 0 as long as it needs to be4646+ * called again.4747+ */4848+struct rxe_task {4949+ void *obj;5050+ struct tasklet_struct tasklet;5151+ int state;5252+ spinlock_t state_lock; /* spinlock for task state */5353+ void *arg;5454+ int (*func)(void *arg);5555+ int ret;5656+ char name[16];5757+};5858+5959+/*6060+ * init rxe_task structure6161+ * arg => parameter to pass to fcn6262+ * fcn => function to call until it returns != 06363+ */6464+int rxe_init_task(void *obj, struct rxe_task *task,6565+ void *arg, int (*func)(void *), char *name);6666+6767+/* cleanup task */6868+void rxe_cleanup_task(struct rxe_task *task);6969+7070+/*7171+ * raw call to func in loop without any checking7272+ * can call when tasklets are disabled7373+ */7474+int __rxe_do_task(struct rxe_task *task);7575+7676+/*7777+ * common function called by any of the main tasklets7878+ * If there is any chance that there is additional7979+ * work to do someone must reschedule the task before8080+ * leaving8181+ */8282+void rxe_do_task(unsigned long data);8383+8484+/* run a task, else schedule it to run as a tasklet, The decision8585+ * to run or schedule tasklet is based on the parameter sched.8686+ */8787+void rxe_run_task(struct rxe_task *task, int sched);8888+8989+/* keep a task from scheduling */9090+void rxe_disable_task(struct rxe_task *task);9191+9292+/* allow task to run */9393+void rxe_enable_task(struct rxe_task *task);9494+9595+#endif /* RXE_TASK_H */
+1330
drivers/infiniband/sw/rxe/rxe_verbs.c
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#include "rxe.h"3535+#include "rxe_loc.h"3636+#include "rxe_queue.h"3737+3838+static int rxe_query_device(struct ib_device *dev,3939+ struct ib_device_attr *attr,4040+ struct ib_udata *uhw)4141+{4242+ struct rxe_dev *rxe = to_rdev(dev);4343+4444+ if (uhw->inlen || uhw->outlen)4545+ return -EINVAL;4646+4747+ *attr = rxe->attr;4848+ return 0;4949+}5050+5151+static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed,5252+ u8 *active_width)5353+{5454+ if (speed <= 1000) {5555+ *active_width = IB_WIDTH_1X;5656+ *active_speed = IB_SPEED_SDR;5757+ } else if (speed <= 10000) {5858+ *active_width = IB_WIDTH_1X;5959+ *active_speed = IB_SPEED_FDR10;6060+ } else if (speed <= 20000) {6161+ *active_width = IB_WIDTH_4X;6262+ *active_speed = IB_SPEED_DDR;6363+ } else if (speed <= 30000) {6464+ *active_width = IB_WIDTH_4X;6565+ *active_speed = IB_SPEED_QDR;6666+ } else if (speed <= 40000) {6767+ *active_width = IB_WIDTH_4X;6868+ *active_speed = IB_SPEED_FDR10;6969+ } else {7070+ *active_width = IB_WIDTH_4X;7171+ *active_speed = IB_SPEED_EDR;7272+ }7373+}7474+7575+static int rxe_query_port(struct ib_device *dev,7676+ u8 port_num, struct ib_port_attr *attr)7777+{7878+ struct rxe_dev *rxe = to_rdev(dev);7979+ struct rxe_port *port;8080+ u32 speed;8181+8282+ if (unlikely(port_num != 1)) {8383+ pr_warn("invalid port_number %d\n", port_num);8484+ goto err1;8585+ }8686+8787+ port = &rxe->port;8888+8989+ *attr = port->attr;9090+9191+ mutex_lock(&rxe->usdev_lock);9292+ if (rxe->ndev->ethtool_ops->get_link_ksettings) {9393+ struct ethtool_link_ksettings ks;9494+9595+ rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks);9696+ speed = ks.base.speed;9797+ } else if (rxe->ndev->ethtool_ops->get_settings) {9898+ struct ethtool_cmd cmd;9999+100100+ rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd);101101+ speed = cmd.speed;102102+ } else {103103+ pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name);104104+ speed = 1000;105105+ }106106+ rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width);107107+ mutex_unlock(&rxe->usdev_lock);108108+109109+ return 0;110110+111111+err1:112112+ return -EINVAL;113113+}114114+115115+static int rxe_query_gid(struct ib_device *device,116116+ u8 port_num, int index, union ib_gid *gid)117117+{118118+ int ret;119119+120120+ if (index > RXE_PORT_GID_TBL_LEN)121121+ return -EINVAL;122122+123123+ ret = ib_get_cached_gid(device, port_num, index, gid, NULL);124124+ if (ret == -EAGAIN) {125125+ memcpy(gid, &zgid, sizeof(*gid));126126+ return 0;127127+ }128128+129129+ return ret;130130+}131131+132132+static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int133133+ index, const union ib_gid *gid,134134+ const struct ib_gid_attr *attr, void **context)135135+{136136+ if (index >= RXE_PORT_GID_TBL_LEN)137137+ return -EINVAL;138138+ return 0;139139+}140140+141141+static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int142142+ index, void **context)143143+{144144+ if (index >= RXE_PORT_GID_TBL_LEN)145145+ return -EINVAL;146146+ return 0;147147+}148148+149149+static struct net_device *rxe_get_netdev(struct ib_device *device,150150+ u8 port_num)151151+{152152+ struct rxe_dev *rxe = to_rdev(device);153153+154154+ if (rxe->ndev) {155155+ dev_hold(rxe->ndev);156156+ return rxe->ndev;157157+ }158158+159159+ return NULL;160160+}161161+162162+static int rxe_query_pkey(struct ib_device *device,163163+ u8 port_num, u16 index, u16 *pkey)164164+{165165+ struct rxe_dev *rxe = to_rdev(device);166166+ struct rxe_port *port;167167+168168+ if (unlikely(port_num != 1)) {169169+ dev_warn(device->dma_device, "invalid port_num = %d\n",170170+ port_num);171171+ goto err1;172172+ }173173+174174+ port = &rxe->port;175175+176176+ if (unlikely(index >= port->attr.pkey_tbl_len)) {177177+ dev_warn(device->dma_device, "invalid index = %d\n",178178+ index);179179+ goto err1;180180+ }181181+182182+ *pkey = port->pkey_tbl[index];183183+ return 0;184184+185185+err1:186186+ return -EINVAL;187187+}188188+189189+static int rxe_modify_device(struct ib_device *dev,190190+ int mask, struct ib_device_modify *attr)191191+{192192+ struct rxe_dev *rxe = to_rdev(dev);193193+194194+ if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)195195+ rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);196196+197197+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {198198+ memcpy(rxe->ib_dev.node_desc,199199+ attr->node_desc, sizeof(rxe->ib_dev.node_desc));200200+ }201201+202202+ return 0;203203+}204204+205205+static int rxe_modify_port(struct ib_device *dev,206206+ u8 port_num, int mask, struct ib_port_modify *attr)207207+{208208+ struct rxe_dev *rxe = to_rdev(dev);209209+ struct rxe_port *port;210210+211211+ if (unlikely(port_num != 1)) {212212+ pr_warn("invalid port_num = %d\n", port_num);213213+ goto err1;214214+ }215215+216216+ port = &rxe->port;217217+218218+ port->attr.port_cap_flags |= attr->set_port_cap_mask;219219+ port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;220220+221221+ if (mask & IB_PORT_RESET_QKEY_CNTR)222222+ port->attr.qkey_viol_cntr = 0;223223+224224+ return 0;225225+226226+err1:227227+ return -EINVAL;228228+}229229+230230+static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,231231+ u8 port_num)232232+{233233+ struct rxe_dev *rxe = to_rdev(dev);234234+235235+ return rxe->ifc_ops->link_layer(rxe, port_num);236236+}237237+238238+static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,239239+ struct ib_udata *udata)240240+{241241+ struct rxe_dev *rxe = to_rdev(dev);242242+ struct rxe_ucontext *uc;243243+244244+ uc = rxe_alloc(&rxe->uc_pool);245245+ return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);246246+}247247+248248+static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)249249+{250250+ struct rxe_ucontext *uc = to_ruc(ibuc);251251+252252+ rxe_drop_ref(uc);253253+ return 0;254254+}255255+256256+static int rxe_port_immutable(struct ib_device *dev, u8 port_num,257257+ struct ib_port_immutable *immutable)258258+{259259+ int err;260260+ struct ib_port_attr attr;261261+262262+ err = rxe_query_port(dev, port_num, &attr);263263+ if (err)264264+ return err;265265+266266+ immutable->pkey_tbl_len = attr.pkey_tbl_len;267267+ immutable->gid_tbl_len = attr.gid_tbl_len;268268+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;269269+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;270270+271271+ return 0;272272+}273273+274274+static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,275275+ struct ib_ucontext *context,276276+ struct ib_udata *udata)277277+{278278+ struct rxe_dev *rxe = to_rdev(dev);279279+ struct rxe_pd *pd;280280+281281+ pd = rxe_alloc(&rxe->pd_pool);282282+ return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);283283+}284284+285285+static int rxe_dealloc_pd(struct ib_pd *ibpd)286286+{287287+ struct rxe_pd *pd = to_rpd(ibpd);288288+289289+ rxe_drop_ref(pd);290290+ return 0;291291+}292292+293293+static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,294294+ struct rxe_av *av)295295+{296296+ int err;297297+ union ib_gid sgid;298298+ struct ib_gid_attr sgid_attr;299299+300300+ err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num,301301+ attr->grh.sgid_index, &sgid,302302+ &sgid_attr);303303+ if (err) {304304+ pr_err("Failed to query sgid. err = %d\n", err);305305+ return err;306306+ }307307+308308+ err = rxe_av_from_attr(rxe, attr->port_num, av, attr);309309+ if (!err)310310+ err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid);311311+312312+ if (sgid_attr.ndev)313313+ dev_put(sgid_attr.ndev);314314+ return err;315315+}316316+317317+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)318318+{319319+ int err;320320+ struct rxe_dev *rxe = to_rdev(ibpd->device);321321+ struct rxe_pd *pd = to_rpd(ibpd);322322+ struct rxe_ah *ah;323323+324324+ err = rxe_av_chk_attr(rxe, attr);325325+ if (err)326326+ goto err1;327327+328328+ ah = rxe_alloc(&rxe->ah_pool);329329+ if (!ah) {330330+ err = -ENOMEM;331331+ goto err1;332332+ }333333+334334+ rxe_add_ref(pd);335335+ ah->pd = pd;336336+337337+ err = rxe_init_av(rxe, attr, &ah->av);338338+ if (err)339339+ goto err2;340340+341341+ return &ah->ibah;342342+343343+err2:344344+ rxe_drop_ref(pd);345345+ rxe_drop_ref(ah);346346+err1:347347+ return ERR_PTR(err);348348+}349349+350350+static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)351351+{352352+ int err;353353+ struct rxe_dev *rxe = to_rdev(ibah->device);354354+ struct rxe_ah *ah = to_rah(ibah);355355+356356+ err = rxe_av_chk_attr(rxe, attr);357357+ if (err)358358+ return err;359359+360360+ err = rxe_init_av(rxe, attr, &ah->av);361361+ if (err)362362+ return err;363363+364364+ return 0;365365+}366366+367367+static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)368368+{369369+ struct rxe_dev *rxe = to_rdev(ibah->device);370370+ struct rxe_ah *ah = to_rah(ibah);371371+372372+ rxe_av_to_attr(rxe, &ah->av, attr);373373+ return 0;374374+}375375+376376+static int rxe_destroy_ah(struct ib_ah *ibah)377377+{378378+ struct rxe_ah *ah = to_rah(ibah);379379+380380+ rxe_drop_ref(ah->pd);381381+ rxe_drop_ref(ah);382382+ return 0;383383+}384384+385385+static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)386386+{387387+ int err;388388+ int i;389389+ u32 length;390390+ struct rxe_recv_wqe *recv_wqe;391391+ int num_sge = ibwr->num_sge;392392+393393+ if (unlikely(queue_full(rq->queue))) {394394+ err = -ENOMEM;395395+ goto err1;396396+ }397397+398398+ if (unlikely(num_sge > rq->max_sge)) {399399+ err = -EINVAL;400400+ goto err1;401401+ }402402+403403+ length = 0;404404+ for (i = 0; i < num_sge; i++)405405+ length += ibwr->sg_list[i].length;406406+407407+ recv_wqe = producer_addr(rq->queue);408408+ recv_wqe->wr_id = ibwr->wr_id;409409+ recv_wqe->num_sge = num_sge;410410+411411+ memcpy(recv_wqe->dma.sge, ibwr->sg_list,412412+ num_sge * sizeof(struct ib_sge));413413+414414+ recv_wqe->dma.length = length;415415+ recv_wqe->dma.resid = length;416416+ recv_wqe->dma.num_sge = num_sge;417417+ recv_wqe->dma.cur_sge = 0;418418+ recv_wqe->dma.sge_offset = 0;419419+420420+ /* make sure all changes to the work queue are written before we421421+ * update the producer pointer422422+ */423423+ smp_wmb();424424+425425+ advance_producer(rq->queue);426426+ return 0;427427+428428+err1:429429+ return err;430430+}431431+432432+static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,433433+ struct ib_srq_init_attr *init,434434+ struct ib_udata *udata)435435+{436436+ int err;437437+ struct rxe_dev *rxe = to_rdev(ibpd->device);438438+ struct rxe_pd *pd = to_rpd(ibpd);439439+ struct rxe_srq *srq;440440+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;441441+442442+ err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);443443+ if (err)444444+ goto err1;445445+446446+ srq = rxe_alloc(&rxe->srq_pool);447447+ if (!srq) {448448+ err = -ENOMEM;449449+ goto err1;450450+ }451451+452452+ rxe_add_index(srq);453453+ rxe_add_ref(pd);454454+ srq->pd = pd;455455+456456+ err = rxe_srq_from_init(rxe, srq, init, context, udata);457457+ if (err)458458+ goto err2;459459+460460+ return &srq->ibsrq;461461+462462+err2:463463+ rxe_drop_ref(pd);464464+ rxe_drop_index(srq);465465+ rxe_drop_ref(srq);466466+err1:467467+ return ERR_PTR(err);468468+}469469+470470+static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,471471+ enum ib_srq_attr_mask mask,472472+ struct ib_udata *udata)473473+{474474+ int err;475475+ struct rxe_srq *srq = to_rsrq(ibsrq);476476+ struct rxe_dev *rxe = to_rdev(ibsrq->device);477477+478478+ err = rxe_srq_chk_attr(rxe, srq, attr, mask);479479+ if (err)480480+ goto err1;481481+482482+ err = rxe_srq_from_attr(rxe, srq, attr, mask, udata);483483+ if (err)484484+ goto err1;485485+486486+ return 0;487487+488488+err1:489489+ return err;490490+}491491+492492+static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)493493+{494494+ struct rxe_srq *srq = to_rsrq(ibsrq);495495+496496+ if (srq->error)497497+ return -EINVAL;498498+499499+ attr->max_wr = srq->rq.queue->buf->index_mask;500500+ attr->max_sge = srq->rq.max_sge;501501+ attr->srq_limit = srq->limit;502502+ return 0;503503+}504504+505505+static int rxe_destroy_srq(struct ib_srq *ibsrq)506506+{507507+ struct rxe_srq *srq = to_rsrq(ibsrq);508508+509509+ if (srq->rq.queue)510510+ rxe_queue_cleanup(srq->rq.queue);511511+512512+ rxe_drop_ref(srq->pd);513513+ rxe_drop_index(srq);514514+ rxe_drop_ref(srq);515515+516516+ return 0;517517+}518518+519519+static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,520520+ struct ib_recv_wr **bad_wr)521521+{522522+ int err = 0;523523+ unsigned long flags;524524+ struct rxe_srq *srq = to_rsrq(ibsrq);525525+526526+ spin_lock_irqsave(&srq->rq.producer_lock, flags);527527+528528+ while (wr) {529529+ err = post_one_recv(&srq->rq, wr);530530+ if (unlikely(err))531531+ break;532532+ wr = wr->next;533533+ }534534+535535+ spin_unlock_irqrestore(&srq->rq.producer_lock, flags);536536+537537+ if (err)538538+ *bad_wr = wr;539539+540540+ return err;541541+}542542+543543+static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,544544+ struct ib_qp_init_attr *init,545545+ struct ib_udata *udata)546546+{547547+ int err;548548+ struct rxe_dev *rxe = to_rdev(ibpd->device);549549+ struct rxe_pd *pd = to_rpd(ibpd);550550+ struct rxe_qp *qp;551551+552552+ err = rxe_qp_chk_init(rxe, init);553553+ if (err)554554+ goto err1;555555+556556+ qp = rxe_alloc(&rxe->qp_pool);557557+ if (!qp) {558558+ err = -ENOMEM;559559+ goto err1;560560+ }561561+562562+ if (udata) {563563+ if (udata->inlen) {564564+ err = -EINVAL;565565+ goto err1;566566+ }567567+ qp->is_user = 1;568568+ }569569+570570+ rxe_add_index(qp);571571+572572+ err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);573573+ if (err)574574+ goto err2;575575+576576+ return &qp->ibqp;577577+578578+err2:579579+ rxe_drop_index(qp);580580+ rxe_drop_ref(qp);581581+err1:582582+ return ERR_PTR(err);583583+}584584+585585+static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,586586+ int mask, struct ib_udata *udata)587587+{588588+ int err;589589+ struct rxe_dev *rxe = to_rdev(ibqp->device);590590+ struct rxe_qp *qp = to_rqp(ibqp);591591+592592+ err = rxe_qp_chk_attr(rxe, qp, attr, mask);593593+ if (err)594594+ goto err1;595595+596596+ err = rxe_qp_from_attr(qp, attr, mask, udata);597597+ if (err)598598+ goto err1;599599+600600+ return 0;601601+602602+err1:603603+ return err;604604+}605605+606606+static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,607607+ int mask, struct ib_qp_init_attr *init)608608+{609609+ struct rxe_qp *qp = to_rqp(ibqp);610610+611611+ rxe_qp_to_init(qp, init);612612+ rxe_qp_to_attr(qp, attr, mask);613613+614614+ return 0;615615+}616616+617617+static int rxe_destroy_qp(struct ib_qp *ibqp)618618+{619619+ struct rxe_qp *qp = to_rqp(ibqp);620620+621621+ rxe_qp_destroy(qp);622622+ rxe_drop_index(qp);623623+ rxe_drop_ref(qp);624624+ return 0;625625+}626626+627627+static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,628628+ unsigned int mask, unsigned int length)629629+{630630+ int num_sge = ibwr->num_sge;631631+ struct rxe_sq *sq = &qp->sq;632632+633633+ if (unlikely(num_sge > sq->max_sge))634634+ goto err1;635635+636636+ if (unlikely(mask & WR_ATOMIC_MASK)) {637637+ if (length < 8)638638+ goto err1;639639+640640+ if (atomic_wr(ibwr)->remote_addr & 0x7)641641+ goto err1;642642+ }643643+644644+ if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&645645+ (length > sq->max_inline)))646646+ goto err1;647647+648648+ return 0;649649+650650+err1:651651+ return -EINVAL;652652+}653653+654654+static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,655655+ struct ib_send_wr *ibwr)656656+{657657+ wr->wr_id = ibwr->wr_id;658658+ wr->num_sge = ibwr->num_sge;659659+ wr->opcode = ibwr->opcode;660660+ wr->send_flags = ibwr->send_flags;661661+662662+ if (qp_type(qp) == IB_QPT_UD ||663663+ qp_type(qp) == IB_QPT_SMI ||664664+ qp_type(qp) == IB_QPT_GSI) {665665+ wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;666666+ wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;667667+ if (qp_type(qp) == IB_QPT_GSI)668668+ wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;669669+ if (wr->opcode == IB_WR_SEND_WITH_IMM)670670+ wr->ex.imm_data = ibwr->ex.imm_data;671671+ } else {672672+ switch (wr->opcode) {673673+ case IB_WR_RDMA_WRITE_WITH_IMM:674674+ wr->ex.imm_data = ibwr->ex.imm_data;675675+ case IB_WR_RDMA_READ:676676+ case IB_WR_RDMA_WRITE:677677+ wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;678678+ wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;679679+ break;680680+ case IB_WR_SEND_WITH_IMM:681681+ wr->ex.imm_data = ibwr->ex.imm_data;682682+ break;683683+ case IB_WR_SEND_WITH_INV:684684+ wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;685685+ break;686686+ case IB_WR_ATOMIC_CMP_AND_SWP:687687+ case IB_WR_ATOMIC_FETCH_AND_ADD:688688+ wr->wr.atomic.remote_addr =689689+ atomic_wr(ibwr)->remote_addr;690690+ wr->wr.atomic.compare_add =691691+ atomic_wr(ibwr)->compare_add;692692+ wr->wr.atomic.swap = atomic_wr(ibwr)->swap;693693+ wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;694694+ break;695695+ case IB_WR_LOCAL_INV:696696+ wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;697697+ break;698698+ case IB_WR_REG_MR:699699+ wr->wr.reg.mr = reg_wr(ibwr)->mr;700700+ wr->wr.reg.key = reg_wr(ibwr)->key;701701+ wr->wr.reg.access = reg_wr(ibwr)->access;702702+ break;703703+ default:704704+ break;705705+ }706706+ }707707+}708708+709709+static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,710710+ unsigned int mask, unsigned int length,711711+ struct rxe_send_wqe *wqe)712712+{713713+ int num_sge = ibwr->num_sge;714714+ struct ib_sge *sge;715715+ int i;716716+ u8 *p;717717+718718+ init_send_wr(qp, &wqe->wr, ibwr);719719+720720+ if (qp_type(qp) == IB_QPT_UD ||721721+ qp_type(qp) == IB_QPT_SMI ||722722+ qp_type(qp) == IB_QPT_GSI)723723+ memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));724724+725725+ if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {726726+ p = wqe->dma.inline_data;727727+728728+ sge = ibwr->sg_list;729729+ for (i = 0; i < num_sge; i++, sge++) {730730+ if (qp->is_user && copy_from_user(p, (__user void *)731731+ (uintptr_t)sge->addr, sge->length))732732+ return -EFAULT;733733+734734+ else if (!qp->is_user)735735+ memcpy(p, (void *)(uintptr_t)sge->addr,736736+ sge->length);737737+738738+ p += sge->length;739739+ }740740+ } else if (mask & WR_REG_MASK) {741741+ wqe->mask = mask;742742+ wqe->state = wqe_state_posted;743743+ return 0;744744+ } else745745+ memcpy(wqe->dma.sge, ibwr->sg_list,746746+ num_sge * sizeof(struct ib_sge));747747+748748+ wqe->iova = (mask & WR_ATOMIC_MASK) ?749749+ atomic_wr(ibwr)->remote_addr :750750+ rdma_wr(ibwr)->remote_addr;751751+ wqe->mask = mask;752752+ wqe->dma.length = length;753753+ wqe->dma.resid = length;754754+ wqe->dma.num_sge = num_sge;755755+ wqe->dma.cur_sge = 0;756756+ wqe->dma.sge_offset = 0;757757+ wqe->state = wqe_state_posted;758758+ wqe->ssn = atomic_add_return(1, &qp->ssn);759759+760760+ return 0;761761+}762762+763763+static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,764764+ unsigned mask, u32 length)765765+{766766+ int err;767767+ struct rxe_sq *sq = &qp->sq;768768+ struct rxe_send_wqe *send_wqe;769769+ unsigned long flags;770770+771771+ err = validate_send_wr(qp, ibwr, mask, length);772772+ if (err)773773+ return err;774774+775775+ spin_lock_irqsave(&qp->sq.sq_lock, flags);776776+777777+ if (unlikely(queue_full(sq->queue))) {778778+ err = -ENOMEM;779779+ goto err1;780780+ }781781+782782+ send_wqe = producer_addr(sq->queue);783783+784784+ err = init_send_wqe(qp, ibwr, mask, length, send_wqe);785785+ if (unlikely(err))786786+ goto err1;787787+788788+ /*789789+ * make sure all changes to the work queue are790790+ * written before we update the producer pointer791791+ */792792+ smp_wmb();793793+794794+ advance_producer(sq->queue);795795+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);796796+797797+ return 0;798798+799799+err1:800800+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);801801+ return err;802802+}803803+804804+static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,805805+ struct ib_send_wr **bad_wr)806806+{807807+ int err = 0;808808+ struct rxe_qp *qp = to_rqp(ibqp);809809+ unsigned int mask;810810+ unsigned int length = 0;811811+ int i;812812+ int must_sched;813813+814814+ if (unlikely(!qp->valid)) {815815+ *bad_wr = wr;816816+ return -EINVAL;817817+ }818818+819819+ if (unlikely(qp->req.state < QP_STATE_READY)) {820820+ *bad_wr = wr;821821+ return -EINVAL;822822+ }823823+824824+ while (wr) {825825+ mask = wr_opcode_mask(wr->opcode, qp);826826+ if (unlikely(!mask)) {827827+ err = -EINVAL;828828+ *bad_wr = wr;829829+ break;830830+ }831831+832832+ if (unlikely((wr->send_flags & IB_SEND_INLINE) &&833833+ !(mask & WR_INLINE_MASK))) {834834+ err = -EINVAL;835835+ *bad_wr = wr;836836+ break;837837+ }838838+839839+ length = 0;840840+ for (i = 0; i < wr->num_sge; i++)841841+ length += wr->sg_list[i].length;842842+843843+ err = post_one_send(qp, wr, mask, length);844844+845845+ if (err) {846846+ *bad_wr = wr;847847+ break;848848+ }849849+ wr = wr->next;850850+ }851851+852852+ /*853853+ * Must sched in case of GSI QP because ib_send_mad() hold irq lock,854854+ * and the requester call ip_local_out_sk() that takes spin_lock_bh.855855+ */856856+ must_sched = (qp_type(qp) == IB_QPT_GSI) ||857857+ (queue_count(qp->sq.queue) > 1);858858+859859+ rxe_run_task(&qp->req.task, must_sched);860860+861861+ return err;862862+}863863+864864+static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,865865+ struct ib_recv_wr **bad_wr)866866+{867867+ int err = 0;868868+ struct rxe_qp *qp = to_rqp(ibqp);869869+ struct rxe_rq *rq = &qp->rq;870870+ unsigned long flags;871871+872872+ if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {873873+ *bad_wr = wr;874874+ err = -EINVAL;875875+ goto err1;876876+ }877877+878878+ if (unlikely(qp->srq)) {879879+ *bad_wr = wr;880880+ err = -EINVAL;881881+ goto err1;882882+ }883883+884884+ spin_lock_irqsave(&rq->producer_lock, flags);885885+886886+ while (wr) {887887+ err = post_one_recv(rq, wr);888888+ if (unlikely(err)) {889889+ *bad_wr = wr;890890+ break;891891+ }892892+ wr = wr->next;893893+ }894894+895895+ spin_unlock_irqrestore(&rq->producer_lock, flags);896896+897897+err1:898898+ return err;899899+}900900+901901+static struct ib_cq *rxe_create_cq(struct ib_device *dev,902902+ const struct ib_cq_init_attr *attr,903903+ struct ib_ucontext *context,904904+ struct ib_udata *udata)905905+{906906+ int err;907907+ struct rxe_dev *rxe = to_rdev(dev);908908+ struct rxe_cq *cq;909909+910910+ if (attr->flags)911911+ return ERR_PTR(-EINVAL);912912+913913+ err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata);914914+ if (err)915915+ goto err1;916916+917917+ cq = rxe_alloc(&rxe->cq_pool);918918+ if (!cq) {919919+ err = -ENOMEM;920920+ goto err1;921921+ }922922+923923+ err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,924924+ context, udata);925925+ if (err)926926+ goto err2;927927+928928+ return &cq->ibcq;929929+930930+err2:931931+ rxe_drop_ref(cq);932932+err1:933933+ return ERR_PTR(err);934934+}935935+936936+static int rxe_destroy_cq(struct ib_cq *ibcq)937937+{938938+ struct rxe_cq *cq = to_rcq(ibcq);939939+940940+ rxe_drop_ref(cq);941941+ return 0;942942+}943943+944944+static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)945945+{946946+ int err;947947+ struct rxe_cq *cq = to_rcq(ibcq);948948+ struct rxe_dev *rxe = to_rdev(ibcq->device);949949+950950+ err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata);951951+ if (err)952952+ goto err1;953953+954954+ err = rxe_cq_resize_queue(cq, cqe, udata);955955+ if (err)956956+ goto err1;957957+958958+ return 0;959959+960960+err1:961961+ return err;962962+}963963+964964+static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)965965+{966966+ int i;967967+ struct rxe_cq *cq = to_rcq(ibcq);968968+ struct rxe_cqe *cqe;969969+ unsigned long flags;970970+971971+ spin_lock_irqsave(&cq->cq_lock, flags);972972+ for (i = 0; i < num_entries; i++) {973973+ cqe = queue_head(cq->queue);974974+ if (!cqe)975975+ break;976976+977977+ memcpy(wc++, &cqe->ibwc, sizeof(*wc));978978+ advance_consumer(cq->queue);979979+ }980980+ spin_unlock_irqrestore(&cq->cq_lock, flags);981981+982982+ return i;983983+}984984+985985+static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)986986+{987987+ struct rxe_cq *cq = to_rcq(ibcq);988988+ int count = queue_count(cq->queue);989989+990990+ return (count > wc_cnt) ? wc_cnt : count;991991+}992992+993993+static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)994994+{995995+ struct rxe_cq *cq = to_rcq(ibcq);996996+997997+ if (cq->notify != IB_CQ_NEXT_COMP)998998+ cq->notify = flags & IB_CQ_SOLICITED_MASK;999999+10001000+ return 0;10011001+}10021002+10031003+static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)10041004+{10051005+ struct rxe_dev *rxe = to_rdev(ibpd->device);10061006+ struct rxe_pd *pd = to_rpd(ibpd);10071007+ struct rxe_mem *mr;10081008+ int err;10091009+10101010+ mr = rxe_alloc(&rxe->mr_pool);10111011+ if (!mr) {10121012+ err = -ENOMEM;10131013+ goto err1;10141014+ }10151015+10161016+ rxe_add_index(mr);10171017+10181018+ rxe_add_ref(pd);10191019+10201020+ err = rxe_mem_init_dma(rxe, pd, access, mr);10211021+ if (err)10221022+ goto err2;10231023+10241024+ return &mr->ibmr;10251025+10261026+err2:10271027+ rxe_drop_ref(pd);10281028+ rxe_drop_index(mr);10291029+ rxe_drop_ref(mr);10301030+err1:10311031+ return ERR_PTR(err);10321032+}10331033+10341034+static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,10351035+ u64 start,10361036+ u64 length,10371037+ u64 iova,10381038+ int access, struct ib_udata *udata)10391039+{10401040+ int err;10411041+ struct rxe_dev *rxe = to_rdev(ibpd->device);10421042+ struct rxe_pd *pd = to_rpd(ibpd);10431043+ struct rxe_mem *mr;10441044+10451045+ mr = rxe_alloc(&rxe->mr_pool);10461046+ if (!mr) {10471047+ err = -ENOMEM;10481048+ goto err2;10491049+ }10501050+10511051+ rxe_add_index(mr);10521052+10531053+ rxe_add_ref(pd);10541054+10551055+ err = rxe_mem_init_user(rxe, pd, start, length, iova,10561056+ access, udata, mr);10571057+ if (err)10581058+ goto err3;10591059+10601060+ return &mr->ibmr;10611061+10621062+err3:10631063+ rxe_drop_ref(pd);10641064+ rxe_drop_index(mr);10651065+ rxe_drop_ref(mr);10661066+err2:10671067+ return ERR_PTR(err);10681068+}10691069+10701070+static int rxe_dereg_mr(struct ib_mr *ibmr)10711071+{10721072+ struct rxe_mem *mr = to_rmr(ibmr);10731073+10741074+ mr->state = RXE_MEM_STATE_ZOMBIE;10751075+ rxe_drop_ref(mr->pd);10761076+ rxe_drop_index(mr);10771077+ rxe_drop_ref(mr);10781078+ return 0;10791079+}10801080+10811081+static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,10821082+ enum ib_mr_type mr_type,10831083+ u32 max_num_sg)10841084+{10851085+ struct rxe_dev *rxe = to_rdev(ibpd->device);10861086+ struct rxe_pd *pd = to_rpd(ibpd);10871087+ struct rxe_mem *mr;10881088+ int err;10891089+10901090+ if (mr_type != IB_MR_TYPE_MEM_REG)10911091+ return ERR_PTR(-EINVAL);10921092+10931093+ mr = rxe_alloc(&rxe->mr_pool);10941094+ if (!mr) {10951095+ err = -ENOMEM;10961096+ goto err1;10971097+ }10981098+10991099+ rxe_add_index(mr);11001100+11011101+ rxe_add_ref(pd);11021102+11031103+ err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr);11041104+ if (err)11051105+ goto err2;11061106+11071107+ return &mr->ibmr;11081108+11091109+err2:11101110+ rxe_drop_ref(pd);11111111+ rxe_drop_index(mr);11121112+ rxe_drop_ref(mr);11131113+err1:11141114+ return ERR_PTR(err);11151115+}11161116+11171117+static int rxe_set_page(struct ib_mr *ibmr, u64 addr)11181118+{11191119+ struct rxe_mem *mr = to_rmr(ibmr);11201120+ struct rxe_map *map;11211121+ struct rxe_phys_buf *buf;11221122+11231123+ if (unlikely(mr->nbuf == mr->num_buf))11241124+ return -ENOMEM;11251125+11261126+ map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];11271127+ buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];11281128+11291129+ buf->addr = addr;11301130+ buf->size = ibmr->page_size;11311131+ mr->nbuf++;11321132+11331133+ return 0;11341134+}11351135+11361136+static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,11371137+ unsigned int *sg_offset)11381138+{11391139+ struct rxe_mem *mr = to_rmr(ibmr);11401140+ int n;11411141+11421142+ mr->nbuf = 0;11431143+11441144+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);11451145+11461146+ mr->va = ibmr->iova;11471147+ mr->iova = ibmr->iova;11481148+ mr->length = ibmr->length;11491149+ mr->page_shift = ilog2(ibmr->page_size);11501150+ mr->page_mask = ibmr->page_size - 1;11511151+ mr->offset = mr->iova & mr->page_mask;11521152+11531153+ return n;11541154+}11551155+11561156+static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)11571157+{11581158+ int err;11591159+ struct rxe_dev *rxe = to_rdev(ibqp->device);11601160+ struct rxe_qp *qp = to_rqp(ibqp);11611161+ struct rxe_mc_grp *grp;11621162+11631163+ /* takes a ref on grp if successful */11641164+ err = rxe_mcast_get_grp(rxe, mgid, &grp);11651165+ if (err)11661166+ return err;11671167+11681168+ err = rxe_mcast_add_grp_elem(rxe, qp, grp);11691169+11701170+ rxe_drop_ref(grp);11711171+ return err;11721172+}11731173+11741174+static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)11751175+{11761176+ struct rxe_dev *rxe = to_rdev(ibqp->device);11771177+ struct rxe_qp *qp = to_rqp(ibqp);11781178+11791179+ return rxe_mcast_drop_grp_elem(rxe, qp, mgid);11801180+}11811181+11821182+static ssize_t rxe_show_parent(struct device *device,11831183+ struct device_attribute *attr, char *buf)11841184+{11851185+ struct rxe_dev *rxe = container_of(device, struct rxe_dev,11861186+ ib_dev.dev);11871187+ char *name;11881188+11891189+ name = rxe->ifc_ops->parent_name(rxe, 1);11901190+ return snprintf(buf, 16, "%s\n", name);11911191+}11921192+11931193+static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);11941194+11951195+static struct device_attribute *rxe_dev_attributes[] = {11961196+ &dev_attr_parent,11971197+};11981198+11991199+int rxe_register_device(struct rxe_dev *rxe)12001200+{12011201+ int err;12021202+ int i;12031203+ struct ib_device *dev = &rxe->ib_dev;12041204+12051205+ strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);12061206+ strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));12071207+12081208+ dev->owner = THIS_MODULE;12091209+ dev->node_type = RDMA_NODE_IB_CA;12101210+ dev->phys_port_cnt = 1;12111211+ dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;12121212+ dev->dma_device = rxe->ifc_ops->dma_device(rxe);12131213+ dev->local_dma_lkey = 0;12141214+ dev->node_guid = rxe->ifc_ops->node_guid(rxe);12151215+ dev->dma_ops = &rxe_dma_mapping_ops;12161216+12171217+ dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;12181218+ dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)12191219+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)12201220+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)12211221+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)12221222+ | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)12231223+ | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)12241224+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)12251225+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)12261226+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)12271227+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)12281228+ | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)12291229+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)12301230+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)12311231+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)12321232+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)12331233+ | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)12341234+ | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)12351235+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)12361236+ | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)12371237+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)12381238+ | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)12391239+ | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)12401240+ | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)12411241+ | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)12421242+ | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)12431243+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)12441244+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)12451245+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)12461246+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)12471247+ | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)12481248+ | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)12491249+ ;12501250+12511251+ dev->query_device = rxe_query_device;12521252+ dev->modify_device = rxe_modify_device;12531253+ dev->query_port = rxe_query_port;12541254+ dev->modify_port = rxe_modify_port;12551255+ dev->get_link_layer = rxe_get_link_layer;12561256+ dev->query_gid = rxe_query_gid;12571257+ dev->get_netdev = rxe_get_netdev;12581258+ dev->add_gid = rxe_add_gid;12591259+ dev->del_gid = rxe_del_gid;12601260+ dev->query_pkey = rxe_query_pkey;12611261+ dev->alloc_ucontext = rxe_alloc_ucontext;12621262+ dev->dealloc_ucontext = rxe_dealloc_ucontext;12631263+ dev->mmap = rxe_mmap;12641264+ dev->get_port_immutable = rxe_port_immutable;12651265+ dev->alloc_pd = rxe_alloc_pd;12661266+ dev->dealloc_pd = rxe_dealloc_pd;12671267+ dev->create_ah = rxe_create_ah;12681268+ dev->modify_ah = rxe_modify_ah;12691269+ dev->query_ah = rxe_query_ah;12701270+ dev->destroy_ah = rxe_destroy_ah;12711271+ dev->create_srq = rxe_create_srq;12721272+ dev->modify_srq = rxe_modify_srq;12731273+ dev->query_srq = rxe_query_srq;12741274+ dev->destroy_srq = rxe_destroy_srq;12751275+ dev->post_srq_recv = rxe_post_srq_recv;12761276+ dev->create_qp = rxe_create_qp;12771277+ dev->modify_qp = rxe_modify_qp;12781278+ dev->query_qp = rxe_query_qp;12791279+ dev->destroy_qp = rxe_destroy_qp;12801280+ dev->post_send = rxe_post_send;12811281+ dev->post_recv = rxe_post_recv;12821282+ dev->create_cq = rxe_create_cq;12831283+ dev->destroy_cq = rxe_destroy_cq;12841284+ dev->resize_cq = rxe_resize_cq;12851285+ dev->poll_cq = rxe_poll_cq;12861286+ dev->peek_cq = rxe_peek_cq;12871287+ dev->req_notify_cq = rxe_req_notify_cq;12881288+ dev->get_dma_mr = rxe_get_dma_mr;12891289+ dev->reg_user_mr = rxe_reg_user_mr;12901290+ dev->dereg_mr = rxe_dereg_mr;12911291+ dev->alloc_mr = rxe_alloc_mr;12921292+ dev->map_mr_sg = rxe_map_mr_sg;12931293+ dev->attach_mcast = rxe_attach_mcast;12941294+ dev->detach_mcast = rxe_detach_mcast;12951295+12961296+ err = ib_register_device(dev, NULL);12971297+ if (err) {12981298+ pr_warn("rxe_register_device failed, err = %d\n", err);12991299+ goto err1;13001300+ }13011301+13021302+ for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {13031303+ err = device_create_file(&dev->dev, rxe_dev_attributes[i]);13041304+ if (err) {13051305+ pr_warn("device_create_file failed, i = %d, err = %d\n",13061306+ i, err);13071307+ goto err2;13081308+ }13091309+ }13101310+13111311+ return 0;13121312+13131313+err2:13141314+ ib_unregister_device(dev);13151315+err1:13161316+ return err;13171317+}13181318+13191319+int rxe_unregister_device(struct rxe_dev *rxe)13201320+{13211321+ int i;13221322+ struct ib_device *dev = &rxe->ib_dev;13231323+13241324+ for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)13251325+ device_remove_file(&dev->dev, rxe_dev_attributes[i]);13261326+13271327+ ib_unregister_device(dev);13281328+13291329+ return 0;13301330+}
+480
drivers/infiniband/sw/rxe/rxe_verbs.h
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.44+ *55+ * This software is available to you under a choice of one of two66+ * licenses. You may choose to be licensed under the terms of the GNU77+ * General Public License (GPL) Version 2, available from the file88+ * COPYING in the main directory of this source tree, or the99+ * OpenIB.org BSD license below:1010+ *1111+ * Redistribution and use in source and binary forms, with or1212+ * without modification, are permitted provided that the following1313+ * conditions are met:1414+ *1515+ * - Redistributions of source code must retain the above1616+ * copyright notice, this list of conditions and the following1717+ * disclaimer.1818+ *1919+ * - Redistributions in binary form must reproduce the above2020+ * copyright notice, this list of conditions and the following2121+ * disclaimer in the documentation and/or other materials2222+ * provided with the distribution.2323+ *2424+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2525+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2626+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2727+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2828+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2929+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN3030+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3131+ * SOFTWARE.3232+ */3333+3434+#ifndef RXE_VERBS_H3535+#define RXE_VERBS_H3636+3737+#include <linux/interrupt.h>3838+#include <rdma/rdma_user_rxe.h>3939+#include "rxe_pool.h"4040+#include "rxe_task.h"4141+4242+static inline int pkey_match(u16 key1, u16 key2)4343+{4444+ return (((key1 & 0x7fff) != 0) &&4545+ ((key1 & 0x7fff) == (key2 & 0x7fff)) &&4646+ ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;4747+}4848+4949+/* Return >0 if psn_a > psn_b5050+ * 0 if psn_a == psn_b5151+ * <0 if psn_a < psn_b5252+ */5353+static inline int psn_compare(u32 psn_a, u32 psn_b)5454+{5555+ s32 diff;5656+5757+ diff = (psn_a - psn_b) << 8;5858+ return diff;5959+}6060+6161+struct rxe_ucontext {6262+ struct rxe_pool_entry pelem;6363+ struct ib_ucontext ibuc;6464+};6565+6666+struct rxe_pd {6767+ struct rxe_pool_entry pelem;6868+ struct ib_pd ibpd;6969+};7070+7171+struct rxe_ah {7272+ struct rxe_pool_entry pelem;7373+ struct ib_ah ibah;7474+ struct rxe_pd *pd;7575+ struct rxe_av av;7676+};7777+7878+struct rxe_cqe {7979+ union {8080+ struct ib_wc ibwc;8181+ struct ib_uverbs_wc uibwc;8282+ };8383+};8484+8585+struct rxe_cq {8686+ struct rxe_pool_entry pelem;8787+ struct ib_cq ibcq;8888+ struct rxe_queue *queue;8989+ spinlock_t cq_lock;9090+ u8 notify;9191+ int is_user;9292+ struct tasklet_struct comp_task;9393+};9494+9595+enum wqe_state {9696+ wqe_state_posted,9797+ wqe_state_processing,9898+ wqe_state_pending,9999+ wqe_state_done,100100+ wqe_state_error,101101+};102102+103103+struct rxe_sq {104104+ int max_wr;105105+ int max_sge;106106+ int max_inline;107107+ spinlock_t sq_lock; /* guard queue */108108+ struct rxe_queue *queue;109109+};110110+111111+struct rxe_rq {112112+ int max_wr;113113+ int max_sge;114114+ spinlock_t producer_lock; /* guard queue producer */115115+ spinlock_t consumer_lock; /* guard queue consumer */116116+ struct rxe_queue *queue;117117+};118118+119119+struct rxe_srq {120120+ struct rxe_pool_entry pelem;121121+ struct ib_srq ibsrq;122122+ struct rxe_pd *pd;123123+ struct rxe_rq rq;124124+ u32 srq_num;125125+126126+ int limit;127127+ int error;128128+};129129+130130+enum rxe_qp_state {131131+ QP_STATE_RESET,132132+ QP_STATE_INIT,133133+ QP_STATE_READY,134134+ QP_STATE_DRAIN, /* req only */135135+ QP_STATE_DRAINED, /* req only */136136+ QP_STATE_ERROR137137+};138138+139139+extern char *rxe_qp_state_name[];140140+141141+struct rxe_req_info {142142+ enum rxe_qp_state state;143143+ int wqe_index;144144+ u32 psn;145145+ int opcode;146146+ atomic_t rd_atomic;147147+ int wait_fence;148148+ int need_rd_atomic;149149+ int wait_psn;150150+ int need_retry;151151+ int noack_pkts;152152+ struct rxe_task task;153153+};154154+155155+struct rxe_comp_info {156156+ u32 psn;157157+ int opcode;158158+ int timeout;159159+ int timeout_retry;160160+ u32 retry_cnt;161161+ u32 rnr_retry;162162+ struct rxe_task task;163163+};164164+165165+enum rdatm_res_state {166166+ rdatm_res_state_next,167167+ rdatm_res_state_new,168168+ rdatm_res_state_replay,169169+};170170+171171+struct resp_res {172172+ int type;173173+ u32 first_psn;174174+ u32 last_psn;175175+ u32 cur_psn;176176+ enum rdatm_res_state state;177177+178178+ union {179179+ struct {180180+ struct sk_buff *skb;181181+ } atomic;182182+ struct {183183+ struct rxe_mem *mr;184184+ u64 va_org;185185+ u32 rkey;186186+ u32 length;187187+ u64 va;188188+ u32 resid;189189+ } read;190190+ };191191+};192192+193193+struct rxe_resp_info {194194+ enum rxe_qp_state state;195195+ u32 msn;196196+ u32 psn;197197+ int opcode;198198+ int drop_msg;199199+ int goto_error;200200+ int sent_psn_nak;201201+ enum ib_wc_status status;202202+ u8 aeth_syndrome;203203+204204+ /* Receive only */205205+ struct rxe_recv_wqe *wqe;206206+207207+ /* RDMA read / atomic only */208208+ u64 va;209209+ struct rxe_mem *mr;210210+ u32 resid;211211+ u32 rkey;212212+ u64 atomic_orig;213213+214214+ /* SRQ only */215215+ struct {216216+ struct rxe_recv_wqe wqe;217217+ struct ib_sge sge[RXE_MAX_SGE];218218+ } srq_wqe;219219+220220+ /* Responder resources. It's a circular list where the oldest221221+ * resource is dropped first.222222+ */223223+ struct resp_res *resources;224224+ unsigned int res_head;225225+ unsigned int res_tail;226226+ struct resp_res *res;227227+ struct rxe_task task;228228+};229229+230230+struct rxe_qp {231231+ struct rxe_pool_entry pelem;232232+ struct ib_qp ibqp;233233+ struct ib_qp_attr attr;234234+ unsigned int valid;235235+ unsigned int mtu;236236+ int is_user;237237+238238+ struct rxe_pd *pd;239239+ struct rxe_srq *srq;240240+ struct rxe_cq *scq;241241+ struct rxe_cq *rcq;242242+243243+ enum ib_sig_type sq_sig_type;244244+245245+ struct rxe_sq sq;246246+ struct rxe_rq rq;247247+248248+ struct socket *sk;249249+250250+ struct rxe_av pri_av;251251+ struct rxe_av alt_av;252252+253253+ /* list of mcast groups qp has joined (for cleanup) */254254+ struct list_head grp_list;255255+ spinlock_t grp_lock; /* guard grp_list */256256+257257+ struct sk_buff_head req_pkts;258258+ struct sk_buff_head resp_pkts;259259+ struct sk_buff_head send_pkts;260260+261261+ struct rxe_req_info req;262262+ struct rxe_comp_info comp;263263+ struct rxe_resp_info resp;264264+265265+ atomic_t ssn;266266+ atomic_t skb_out;267267+ int need_req_skb;268268+269269+ /* Timer for retranmitting packet when ACKs have been lost. RC270270+ * only. The requester sets it when it is not already271271+ * started. The responder resets it whenever an ack is272272+ * received.273273+ */274274+ struct timer_list retrans_timer;275275+ u64 qp_timeout_jiffies;276276+277277+ /* Timer for handling RNR NAKS. */278278+ struct timer_list rnr_nak_timer;279279+280280+ spinlock_t state_lock; /* guard requester and completer */281281+};282282+283283+enum rxe_mem_state {284284+ RXE_MEM_STATE_ZOMBIE,285285+ RXE_MEM_STATE_INVALID,286286+ RXE_MEM_STATE_FREE,287287+ RXE_MEM_STATE_VALID,288288+};289289+290290+enum rxe_mem_type {291291+ RXE_MEM_TYPE_NONE,292292+ RXE_MEM_TYPE_DMA,293293+ RXE_MEM_TYPE_MR,294294+ RXE_MEM_TYPE_FMR,295295+ RXE_MEM_TYPE_MW,296296+};297297+298298+#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))299299+300300+struct rxe_phys_buf {301301+ u64 addr;302302+ u64 size;303303+};304304+305305+struct rxe_map {306306+ struct rxe_phys_buf buf[RXE_BUF_PER_MAP];307307+};308308+309309+struct rxe_mem {310310+ struct rxe_pool_entry pelem;311311+ union {312312+ struct ib_mr ibmr;313313+ struct ib_mw ibmw;314314+ };315315+316316+ struct rxe_pd *pd;317317+ struct ib_umem *umem;318318+319319+ u32 lkey;320320+ u32 rkey;321321+322322+ enum rxe_mem_state state;323323+ enum rxe_mem_type type;324324+ u64 va;325325+ u64 iova;326326+ size_t length;327327+ u32 offset;328328+ int access;329329+330330+ int page_shift;331331+ int page_mask;332332+ int map_shift;333333+ int map_mask;334334+335335+ u32 num_buf;336336+ u32 nbuf;337337+338338+ u32 max_buf;339339+ u32 num_map;340340+341341+ struct rxe_map **map;342342+};343343+344344+struct rxe_mc_grp {345345+ struct rxe_pool_entry pelem;346346+ spinlock_t mcg_lock; /* guard group */347347+ struct rxe_dev *rxe;348348+ struct list_head qp_list;349349+ union ib_gid mgid;350350+ int num_qp;351351+ u32 qkey;352352+ u16 pkey;353353+};354354+355355+struct rxe_mc_elem {356356+ struct rxe_pool_entry pelem;357357+ struct list_head qp_list;358358+ struct list_head grp_list;359359+ struct rxe_qp *qp;360360+ struct rxe_mc_grp *grp;361361+};362362+363363+struct rxe_port {364364+ struct ib_port_attr attr;365365+ u16 *pkey_tbl;366366+ __be64 port_guid;367367+ __be64 subnet_prefix;368368+ spinlock_t port_lock; /* guard port */369369+ unsigned int mtu_cap;370370+ /* special QPs */371371+ u32 qp_smi_index;372372+ u32 qp_gsi_index;373373+};374374+375375+/* callbacks from rdma_rxe to network interface layer */376376+struct rxe_ifc_ops {377377+ void (*release)(struct rxe_dev *rxe);378378+ __be64 (*node_guid)(struct rxe_dev *rxe);379379+ __be64 (*port_guid)(struct rxe_dev *rxe);380380+ struct device *(*dma_device)(struct rxe_dev *rxe);381381+ int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);382382+ int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);383383+ int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,384384+ struct sk_buff *skb, u32 *crc);385385+ int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,386386+ struct sk_buff *skb);387387+ int (*loopback)(struct sk_buff *skb);388388+ struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,389389+ int paylen, struct rxe_pkt_info *pkt);390390+ char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);391391+ enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,392392+ unsigned int port_num);393393+};394394+395395+struct rxe_dev {396396+ struct ib_device ib_dev;397397+ struct ib_device_attr attr;398398+ int max_ucontext;399399+ int max_inline_data;400400+ struct kref ref_cnt;401401+ struct mutex usdev_lock;402402+403403+ struct rxe_ifc_ops *ifc_ops;404404+405405+ struct net_device *ndev;406406+407407+ int xmit_errors;408408+409409+ struct rxe_pool uc_pool;410410+ struct rxe_pool pd_pool;411411+ struct rxe_pool ah_pool;412412+ struct rxe_pool srq_pool;413413+ struct rxe_pool qp_pool;414414+ struct rxe_pool cq_pool;415415+ struct rxe_pool mr_pool;416416+ struct rxe_pool mw_pool;417417+ struct rxe_pool mc_grp_pool;418418+ struct rxe_pool mc_elem_pool;419419+420420+ spinlock_t pending_lock; /* guard pending_mmaps */421421+ struct list_head pending_mmaps;422422+423423+ spinlock_t mmap_offset_lock; /* guard mmap_offset */424424+ int mmap_offset;425425+426426+ struct rxe_port port;427427+ struct list_head list;428428+};429429+430430+static inline struct rxe_dev *to_rdev(struct ib_device *dev)431431+{432432+ return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;433433+}434434+435435+static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)436436+{437437+ return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;438438+}439439+440440+static inline struct rxe_pd *to_rpd(struct ib_pd *pd)441441+{442442+ return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;443443+}444444+445445+static inline struct rxe_ah *to_rah(struct ib_ah *ah)446446+{447447+ return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;448448+}449449+450450+static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)451451+{452452+ return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;453453+}454454+455455+static inline struct rxe_qp *to_rqp(struct ib_qp *qp)456456+{457457+ return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;458458+}459459+460460+static inline struct rxe_cq *to_rcq(struct ib_cq *cq)461461+{462462+ return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;463463+}464464+465465+static inline struct rxe_mem *to_rmr(struct ib_mr *mr)466466+{467467+ return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;468468+}469469+470470+static inline struct rxe_mem *to_rmw(struct ib_mw *mw)471471+{472472+ return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;473473+}474474+475475+int rxe_register_device(struct rxe_dev *rxe);476476+int rxe_unregister_device(struct rxe_dev *rxe);477477+478478+void rxe_mc_cleanup(void *arg);479479+480480+#endif /* RXE_VERBS_H */
···9494 IB_SA_BEST = 39595};96969797+/*9898+ * There are 4 types of join states:9999+ * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.100100+ * The order corresponds to JoinState bits in MCMemberRecord.101101+ */102102+enum ib_sa_mc_join_states {103103+ FULLMEMBER_JOIN,104104+ NONMEMBER_JOIN,105105+ SENDONLY_NONMEBER_JOIN,106106+ SENDONLY_FULLMEMBER_JOIN,107107+ NUM_JOIN_MEMBERSHIP_TYPES,108108+};109109+97110#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)9811199112/*
+3-1
include/rdma/rdma_cm.h
···333333 * address.334334 * @id: Communication identifier associated with the request.335335 * @addr: Multicast address identifying the group to join.336336+ * @join_state: Multicast JoinState bitmap requested by port.337337+ * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits.336338 * @context: User-defined context associated with the join request, returned337339 * to the user through the private_data pointer in multicast events.338340 */339341int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,340340- void *context);342342+ u8 join_state, void *context);341343342344/**343345 * rdma_leave_multicast - Leave the multicast group specified by the given
···11+/*22+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.33+ *44+ * This software is available to you under a choice of one of two55+ * licenses. You may choose to be licensed under the terms of the GNU66+ * General Public License (GPL) Version 2, available from the file77+ * COPYING in the main directory of this source tree, or the88+ * OpenIB.org BSD license below:99+ *1010+ * Redistribution and use in source and binary forms, with or1111+ * without modification, are permitted provided that the following1212+ * conditions are met:1313+ *1414+ * - Redistributions of source code must retain the above1515+ * copyright notice, this list of conditions and the following1616+ * disclaimer.1717+ *1818+ * - Redistributions in binary form must reproduce the above1919+ * copyright notice, this list of conditions and the following2020+ * disclaimer in the documentation and/or other materials2121+ * provided with the distribution.2222+ *2323+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,2424+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF2525+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND2626+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS2727+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN2828+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN2929+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE3030+ * SOFTWARE.3131+ */3232+3333+#ifndef RDMA_USER_RXE_H3434+#define RDMA_USER_RXE_H3535+3636+#include <linux/types.h>3737+3838+union rxe_gid {3939+ __u8 raw[16];4040+ struct {4141+ __be64 subnet_prefix;4242+ __be64 interface_id;4343+ } global;4444+};4545+4646+struct rxe_global_route {4747+ union rxe_gid dgid;4848+ __u32 flow_label;4949+ __u8 sgid_index;5050+ __u8 hop_limit;5151+ __u8 traffic_class;5252+};5353+5454+struct rxe_av {5555+ __u8 port_num;5656+ __u8 network_type;5757+ struct rxe_global_route grh;5858+ union {5959+ struct sockaddr _sockaddr;6060+ struct sockaddr_in _sockaddr_in;6161+ struct sockaddr_in6 _sockaddr_in6;6262+ } sgid_addr, dgid_addr;6363+};6464+6565+struct rxe_send_wr {6666+ __u64 wr_id;6767+ __u32 num_sge;6868+ __u32 opcode;6969+ __u32 send_flags;7070+ union {7171+ __be32 imm_data;7272+ __u32 invalidate_rkey;7373+ } ex;7474+ union {7575+ struct {7676+ __u64 remote_addr;7777+ __u32 rkey;7878+ } rdma;7979+ struct {8080+ __u64 remote_addr;8181+ __u64 compare_add;8282+ __u64 swap;8383+ __u32 rkey;8484+ } atomic;8585+ struct {8686+ __u32 remote_qpn;8787+ __u32 remote_qkey;8888+ __u16 pkey_index;8989+ } ud;9090+ struct {9191+ struct ib_mr *mr;9292+ __u32 key;9393+ int access;9494+ } reg;9595+ } wr;9696+};9797+9898+struct rxe_sge {9999+ __u64 addr;100100+ __u32 length;101101+ __u32 lkey;102102+};103103+104104+struct mminfo {105105+ __u64 offset;106106+ __u32 size;107107+ __u32 pad;108108+};109109+110110+struct rxe_dma_info {111111+ __u32 length;112112+ __u32 resid;113113+ __u32 cur_sge;114114+ __u32 num_sge;115115+ __u32 sge_offset;116116+ union {117117+ __u8 inline_data[0];118118+ struct rxe_sge sge[0];119119+ };120120+};121121+122122+struct rxe_send_wqe {123123+ struct rxe_send_wr wr;124124+ struct rxe_av av;125125+ __u32 status;126126+ __u32 state;127127+ __u64 iova;128128+ __u32 mask;129129+ __u32 first_psn;130130+ __u32 last_psn;131131+ __u32 ack_length;132132+ __u32 ssn;133133+ __u32 has_rd_atomic;134134+ struct rxe_dma_info dma;135135+};136136+137137+struct rxe_recv_wqe {138138+ __u64 wr_id;139139+ __u32 num_sge;140140+ __u32 padding;141141+ struct rxe_dma_info dma;142142+};143143+144144+#endif /* RDMA_USER_RXE_H */