Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'misc' and 'rxe' into k.o/for-4.8-1

+13520 -74
+9
MAINTAINERS
··· 7444 7444 Q: http://patchwork.ozlabs.org/project/netdev/list/ 7445 7445 F: drivers/net/ethernet/mellanox/mlxsw/ 7446 7446 7447 + SOFT-ROCE DRIVER (rxe) 7448 + M: Moni Shoua <monis@mellanox.com> 7449 + L: linux-rdma@vger.kernel.org 7450 + S: Supported 7451 + W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home 7452 + Q: http://patchwork.kernel.org/project/linux-rdma/list/ 7453 + F: drivers/infiniband/hw/rxe/ 7454 + F: include/uapi/rdma/rdma_user_rxe.h 7455 + 7447 7456 MEMBARRIER SUPPORT 7448 7457 M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 7449 7458 M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+1
drivers/infiniband/Kconfig
··· 84 84 source "drivers/infiniband/ulp/isert/Kconfig" 85 85 86 86 source "drivers/infiniband/sw/rdmavt/Kconfig" 87 + source "drivers/infiniband/sw/rxe/Kconfig" 87 88 88 89 source "drivers/infiniband/hw/hfi1/Kconfig" 89 90
+93 -7
drivers/infiniband/core/cma.c
··· 68 68 MODULE_LICENSE("Dual BSD/GPL"); 69 69 70 70 #define CMA_CM_RESPONSE_TIMEOUT 20 71 + #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 71 72 #define CMA_MAX_CM_RETRIES 15 72 73 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 73 74 #define CMA_IBOE_PACKET_LIFETIME 18 ··· 161 160 enum rdma_port_space ps; 162 161 struct hlist_head owners; 163 162 unsigned short port; 163 + }; 164 + 165 + struct class_port_info_context { 166 + struct ib_class_port_info *class_port_info; 167 + struct ib_device *device; 168 + struct completion done; 169 + struct ib_sa_query *sa_query; 170 + u8 port_num; 164 171 }; 165 172 166 173 static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, ··· 315 306 struct sockaddr_storage addr; 316 307 struct kref mcref; 317 308 bool igmp_joined; 309 + u8 join_state; 318 310 }; 319 311 320 312 struct cma_work { ··· 3764 3754 } 3765 3755 } 3766 3756 3757 + static void cma_query_sa_classport_info_cb(int status, 3758 + struct ib_class_port_info *rec, 3759 + void *context) 3760 + { 3761 + struct class_port_info_context *cb_ctx = context; 3762 + 3763 + WARN_ON(!context); 3764 + 3765 + if (status || !rec) { 3766 + pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 3767 + cb_ctx->device->name, cb_ctx->port_num, status); 3768 + goto out; 3769 + } 3770 + 3771 + memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 3772 + 3773 + out: 3774 + complete(&cb_ctx->done); 3775 + } 3776 + 3777 + static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 3778 + struct ib_class_port_info *class_port_info) 3779 + { 3780 + struct class_port_info_context *cb_ctx; 3781 + int ret; 3782 + 3783 + cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 3784 + if (!cb_ctx) 3785 + return -ENOMEM; 3786 + 3787 + cb_ctx->device = device; 3788 + cb_ctx->class_port_info = class_port_info; 3789 + cb_ctx->port_num = port_num; 3790 + init_completion(&cb_ctx->done); 3791 + 3792 + ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 3793 + CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 3794 + GFP_KERNEL, cma_query_sa_classport_info_cb, 3795 + cb_ctx, &cb_ctx->sa_query); 3796 + if (ret < 0) { 3797 + pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 3798 + device->name, port_num, ret); 3799 + goto out; 3800 + } 3801 + 3802 + wait_for_completion(&cb_ctx->done); 3803 + 3804 + out: 3805 + kfree(cb_ctx); 3806 + return ret; 3807 + } 3808 + 3767 3809 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3768 3810 struct cma_multicast *mc) 3769 3811 { 3770 3812 struct ib_sa_mcmember_rec rec; 3813 + struct ib_class_port_info class_port_info; 3771 3814 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3772 3815 ib_sa_comp_mask comp_mask; 3773 3816 int ret; ··· 3839 3776 rec.qkey = cpu_to_be32(id_priv->qkey); 3840 3777 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3841 3778 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3842 - rec.join_state = 1; 3779 + rec.join_state = mc->join_state; 3780 + 3781 + if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 3782 + ret = cma_query_sa_classport_info(id_priv->id.device, 3783 + id_priv->id.port_num, 3784 + &class_port_info); 3785 + 3786 + if (ret) 3787 + return ret; 3788 + 3789 + if (!(ib_get_cpi_capmask2(&class_port_info) & 3790 + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 3791 + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 3792 + "RDMA CM: SM doesn't support Send Only Full Member option\n", 3793 + id_priv->id.device->name, id_priv->id.port_num); 3794 + return -EOPNOTSUPP; 3795 + } 3796 + } 3843 3797 3844 3798 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 3845 3799 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | ··· 3925 3845 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 3926 3846 struct net_device *ndev = NULL; 3927 3847 enum ib_gid_type gid_type; 3848 + bool send_only; 3849 + 3850 + send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 3928 3851 3929 3852 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 3930 3853 return -EINVAL; ··· 3961 3878 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3962 3879 rdma_start_port(id_priv->cma_dev->device)]; 3963 3880 if (addr->sa_family == AF_INET) { 3964 - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 3965 - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 3966 - true); 3967 - if (!err) { 3968 - mc->igmp_joined = true; 3881 + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 3969 3882 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 3883 + if (!send_only) { 3884 + err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 3885 + true); 3886 + if (!err) 3887 + mc->igmp_joined = true; 3888 + } 3970 3889 } 3971 3890 } else { 3972 3891 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ··· 3998 3913 } 3999 3914 4000 3915 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4001 - void *context) 3916 + u8 join_state, void *context) 4002 3917 { 4003 3918 struct rdma_id_private *id_priv; 4004 3919 struct cma_multicast *mc; ··· 4017 3932 mc->context = context; 4018 3933 mc->id_priv = id_priv; 4019 3934 mc->igmp_joined = false; 3935 + mc->join_state = join_state; 4020 3936 spin_lock(&id_priv->lock); 4021 3937 list_add(&mc->list, &id_priv->mc_list); 4022 3938 spin_unlock(&id_priv->lock);
+2 -1
drivers/infiniband/core/iwpm_util.c
··· 37 37 #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) 38 38 #define IWPM_REMINFO_HASH_SIZE 64 39 39 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) 40 + #define IWPM_MSG_SIZE 512 40 41 41 42 static LIST_HEAD(iwpm_nlmsg_req_list); 42 43 static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); ··· 453 452 { 454 453 struct sk_buff *skb = NULL; 455 454 456 - skb = dev_alloc_skb(NLMSG_GOODSIZE); 455 + skb = dev_alloc_skb(IWPM_MSG_SIZE); 457 456 if (!skb) { 458 457 pr_err("%s Unable to allocate skb\n", __func__); 459 458 goto create_nlmsg_exit;
-12
drivers/infiniband/core/multicast.c
··· 93 93 94 94 struct mcast_member; 95 95 96 - /* 97 - * There are 4 types of join states: 98 - * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. 99 - */ 100 - enum { 101 - FULLMEMBER_JOIN, 102 - NONMEMBER_JOIN, 103 - SENDONLY_NONMEBER_JOIN, 104 - SENDONLY_FULLMEMBER_JOIN, 105 - NUM_JOIN_MEMBERSHIP_TYPES, 106 - }; 107 - 108 96 struct mcast_group { 109 97 struct ib_sa_mcmember_rec rec; 110 98 struct rb_node node;
+41
drivers/infiniband/core/sa_query.c
··· 65 65 u8 src_path_mask; 66 66 }; 67 67 68 + struct ib_sa_classport_cache { 69 + bool valid; 70 + struct ib_class_port_info data; 71 + }; 72 + 68 73 struct ib_sa_port { 69 74 struct ib_mad_agent *agent; 70 75 struct ib_sa_sm_ah *sm_ah; 71 76 struct work_struct update_task; 77 + struct ib_sa_classport_cache classport_info; 78 + spinlock_t classport_lock; /* protects class port info set */ 72 79 spinlock_t ah_lock; 73 80 u8 port_num; 74 81 }; ··· 1005 998 port->sm_ah = NULL; 1006 999 spin_unlock_irqrestore(&port->ah_lock, flags); 1007 1000 1001 + if (event->event == IB_EVENT_SM_CHANGE || 1002 + event->event == IB_EVENT_CLIENT_REREGISTER || 1003 + event->event == IB_EVENT_LID_CHANGE) { 1004 + spin_lock_irqsave(&port->classport_lock, flags); 1005 + port->classport_info.valid = false; 1006 + spin_unlock_irqrestore(&port->classport_lock, flags); 1007 + } 1008 1008 queue_work(ib_wq, &sa_dev->port[event->element.port_num - 1009 1009 sa_dev->start_port].update_task); 1010 1010 } ··· 1733 1719 int status, 1734 1720 struct ib_sa_mad *mad) 1735 1721 { 1722 + unsigned long flags; 1736 1723 struct ib_sa_classport_info_query *query = 1737 1724 container_of(sa_query, struct ib_sa_classport_info_query, sa_query); 1738 1725 ··· 1743 1728 ib_unpack(classport_info_rec_table, 1744 1729 ARRAY_SIZE(classport_info_rec_table), 1745 1730 mad->data, &rec); 1731 + 1732 + spin_lock_irqsave(&sa_query->port->classport_lock, flags); 1733 + if (!status && !sa_query->port->classport_info.valid) { 1734 + memcpy(&sa_query->port->classport_info.data, &rec, 1735 + sizeof(sa_query->port->classport_info.data)); 1736 + 1737 + sa_query->port->classport_info.valid = true; 1738 + } 1739 + spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); 1740 + 1746 1741 query->callback(status, &rec, query->context); 1747 1742 } else { 1748 1743 query->callback(status, NULL, query->context); ··· 1779 1754 struct ib_sa_port *port; 1780 1755 struct ib_mad_agent *agent; 1781 1756 struct ib_sa_mad *mad; 1757 + struct ib_class_port_info cached_class_port_info; 1782 1758 int ret; 1759 + unsigned long flags; 1783 1760 1784 1761 if (!sa_dev) 1785 1762 return -ENODEV; 1786 1763 1787 1764 port = &sa_dev->port[port_num - sa_dev->start_port]; 1788 1765 agent = port->agent; 1766 + 1767 + /* Use cached ClassPortInfo attribute if valid instead of sending mad */ 1768 + spin_lock_irqsave(&port->classport_lock, flags); 1769 + if (port->classport_info.valid && callback) { 1770 + memcpy(&cached_class_port_info, &port->classport_info.data, 1771 + sizeof(cached_class_port_info)); 1772 + spin_unlock_irqrestore(&port->classport_lock, flags); 1773 + callback(0, &cached_class_port_info, context); 1774 + return 0; 1775 + } 1776 + spin_unlock_irqrestore(&port->classport_lock, flags); 1789 1777 1790 1778 query = kzalloc(sizeof(*query), gfp_mask); 1791 1779 if (!query) ··· 1922 1884 1923 1885 sa_dev->port[i].sm_ah = NULL; 1924 1886 sa_dev->port[i].port_num = i + s; 1887 + 1888 + spin_lock_init(&sa_dev->port[i].classport_lock); 1889 + sa_dev->port[i].classport_info.valid = false; 1925 1890 1926 1891 sa_dev->port[i].agent = 1927 1892 ib_register_mad_agent(device, i + s, IB_QPT_GSI,
+14 -4
drivers/infiniband/core/ucma.c
··· 106 106 int events_reported; 107 107 108 108 u64 uid; 109 + u8 join_state; 109 110 struct list_head list; 110 111 struct sockaddr_storage addr; 111 112 }; ··· 1318 1317 struct ucma_multicast *mc; 1319 1318 struct sockaddr *addr; 1320 1319 int ret; 1320 + u8 join_state; 1321 1321 1322 1322 if (out_len < sizeof(resp)) 1323 1323 return -ENOSPC; 1324 1324 1325 1325 addr = (struct sockaddr *) &cmd->addr; 1326 - if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) 1326 + if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) 1327 + return -EINVAL; 1328 + 1329 + if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1330 + join_state = BIT(FULLMEMBER_JOIN); 1331 + else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1332 + join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1333 + else 1327 1334 return -EINVAL; 1328 1335 1329 1336 ctx = ucma_get_ctx(file, cmd->id); ··· 1344 1335 ret = -ENOMEM; 1345 1336 goto err1; 1346 1337 } 1347 - 1338 + mc->join_state = join_state; 1348 1339 mc->uid = cmd->uid; 1349 1340 memcpy(&mc->addr, addr, cmd->addr_size); 1350 - ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); 1341 + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1342 + join_state, mc); 1351 1343 if (ret) 1352 1344 goto err2; 1353 1345 ··· 1392 1382 join_cmd.uid = cmd.uid; 1393 1383 join_cmd.id = cmd.id; 1394 1384 join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); 1395 - join_cmd.reserved = 0; 1385 + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1396 1386 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1397 1387 1398 1388 return ucma_process_join(file, &join_cmd, out_len);
+1
drivers/infiniband/core/uverbs.h
··· 116 116 struct ib_uverbs_file { 117 117 struct kref ref; 118 118 struct mutex mutex; 119 + struct mutex cleanup_mutex; /* protect cleanup */ 119 120 struct ib_uverbs_device *device; 120 121 struct ib_ucontext *ucontext; 121 122 struct ib_event_handler event_handler;
+24 -13
drivers/infiniband/core/uverbs_main.c
··· 969 969 file->async_file = NULL; 970 970 kref_init(&file->ref); 971 971 mutex_init(&file->mutex); 972 + mutex_init(&file->cleanup_mutex); 972 973 973 974 filp->private_data = file; 974 975 kobject_get(&dev->kobj); ··· 995 994 { 996 995 struct ib_uverbs_file *file = filp->private_data; 997 996 struct ib_uverbs_device *dev = file->device; 998 - struct ib_ucontext *ucontext = NULL; 997 + 998 + mutex_lock(&file->cleanup_mutex); 999 + if (file->ucontext) { 1000 + ib_uverbs_cleanup_ucontext(file, file->ucontext); 1001 + file->ucontext = NULL; 1002 + } 1003 + mutex_unlock(&file->cleanup_mutex); 999 1004 1000 1005 mutex_lock(&file->device->lists_mutex); 1001 - ucontext = file->ucontext; 1002 - file->ucontext = NULL; 1003 1006 if (!file->is_closed) { 1004 1007 list_del(&file->list); 1005 1008 file->is_closed = 1; 1006 1009 } 1007 1010 mutex_unlock(&file->device->lists_mutex); 1008 - if (ucontext) 1009 - ib_uverbs_cleanup_ucontext(file, ucontext); 1010 1011 1011 1012 if (file->async_file) 1012 1013 kref_put(&file->async_file->ref, ib_uverbs_release_event_file); ··· 1222 1219 mutex_lock(&uverbs_dev->lists_mutex); 1223 1220 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1224 1221 struct ib_ucontext *ucontext; 1225 - 1226 1222 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1227 1223 struct ib_uverbs_file, list); 1228 1224 file->is_closed = 1; 1229 - ucontext = file->ucontext; 1230 1225 list_del(&file->list); 1231 - file->ucontext = NULL; 1232 1226 kref_get(&file->ref); 1233 1227 mutex_unlock(&uverbs_dev->lists_mutex); 1234 - /* We must release the mutex before going ahead and calling 1235 - * disassociate_ucontext. disassociate_ucontext might end up 1236 - * indirectly calling uverbs_close, for example due to freeing 1237 - * the resources (e.g mmput). 1238 - */ 1228 + 1239 1229 ib_uverbs_event_handler(&file->event_handler, &event); 1230 + 1231 + mutex_lock(&file->cleanup_mutex); 1232 + ucontext = file->ucontext; 1233 + file->ucontext = NULL; 1234 + mutex_unlock(&file->cleanup_mutex); 1235 + 1236 + /* At this point ib_uverbs_close cannot be running 1237 + * ib_uverbs_cleanup_ucontext 1238 + */ 1240 1239 if (ucontext) { 1240 + /* We must release the mutex before going ahead and 1241 + * calling disassociate_ucontext. disassociate_ucontext 1242 + * might end up indirectly calling uverbs_close, 1243 + * for example due to freeing the resources 1244 + * (e.g mmput). 1245 + */ 1241 1246 ib_dev->disassociate_ucontext(ucontext); 1242 1247 ib_uverbs_cleanup_ucontext(file, ucontext); 1243 1248 }
-1
drivers/infiniband/hw/hfi1/Kconfig
··· 3 3 depends on X86_64 && INFINIBAND_RDMAVT 4 4 select MMU_NOTIFIER 5 5 select CRC32 6 - default m 7 6 ---help--- 8 7 This is a low-level driver for Intel OPA Gen1 adapter. 9 8 config HFI1_DEBUG_SDMA_ORDER
+1 -1
drivers/infiniband/hw/hfi1/file_ops.c
··· 225 225 sizeof(struct hfi1_base_info)); 226 226 break; 227 227 case HFI1_IOCTL_CREDIT_UPD: 228 - if (uctxt && uctxt->sc) 228 + if (uctxt) 229 229 sc_return_credits(uctxt->sc); 230 230 break; 231 231
+2 -2
drivers/infiniband/hw/mlx4/cq.c
··· 288 288 if (cq->resize_buf) 289 289 return -EBUSY; 290 290 291 - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 291 + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); 292 292 if (!cq->resize_buf) 293 293 return -ENOMEM; 294 294 ··· 316 316 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) 317 317 return -EFAULT; 318 318 319 - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 319 + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); 320 320 if (!cq->resize_buf) 321 321 return -ENOMEM; 322 322
+197 -1
drivers/infiniband/hw/mlx4/main.c
··· 2049 2049 &dev_attr_board_id 2050 2050 }; 2051 2051 2052 + struct diag_counter { 2053 + const char *name; 2054 + u32 offset; 2055 + }; 2056 + 2057 + #define DIAG_COUNTER(_name, _offset) \ 2058 + { .name = #_name, .offset = _offset } 2059 + 2060 + static const struct diag_counter diag_basic[] = { 2061 + DIAG_COUNTER(rq_num_lle, 0x00), 2062 + DIAG_COUNTER(sq_num_lle, 0x04), 2063 + DIAG_COUNTER(rq_num_lqpoe, 0x08), 2064 + DIAG_COUNTER(sq_num_lqpoe, 0x0C), 2065 + DIAG_COUNTER(rq_num_lpe, 0x18), 2066 + DIAG_COUNTER(sq_num_lpe, 0x1C), 2067 + DIAG_COUNTER(rq_num_wrfe, 0x20), 2068 + DIAG_COUNTER(sq_num_wrfe, 0x24), 2069 + DIAG_COUNTER(sq_num_mwbe, 0x2C), 2070 + DIAG_COUNTER(sq_num_bre, 0x34), 2071 + DIAG_COUNTER(sq_num_rire, 0x44), 2072 + DIAG_COUNTER(rq_num_rire, 0x48), 2073 + DIAG_COUNTER(sq_num_rae, 0x4C), 2074 + DIAG_COUNTER(rq_num_rae, 0x50), 2075 + DIAG_COUNTER(sq_num_roe, 0x54), 2076 + DIAG_COUNTER(sq_num_tree, 0x5C), 2077 + DIAG_COUNTER(sq_num_rree, 0x64), 2078 + DIAG_COUNTER(rq_num_rnr, 0x68), 2079 + DIAG_COUNTER(sq_num_rnr, 0x6C), 2080 + DIAG_COUNTER(rq_num_oos, 0x100), 2081 + DIAG_COUNTER(sq_num_oos, 0x104), 2082 + }; 2083 + 2084 + static const struct diag_counter diag_ext[] = { 2085 + DIAG_COUNTER(rq_num_dup, 0x130), 2086 + DIAG_COUNTER(sq_num_to, 0x134), 2087 + }; 2088 + 2089 + static const struct diag_counter diag_device_only[] = { 2090 + DIAG_COUNTER(num_cqovf, 0x1A0), 2091 + DIAG_COUNTER(rq_num_udsdprd, 0x118), 2092 + }; 2093 + 2094 + static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev, 2095 + u8 port_num) 2096 + { 2097 + struct mlx4_ib_dev *dev = to_mdev(ibdev); 2098 + struct mlx4_ib_diag_counters *diag = dev->diag_counters; 2099 + 2100 + if (!diag[!!port_num].name) 2101 + return NULL; 2102 + 2103 + return rdma_alloc_hw_stats_struct(diag[!!port_num].name, 2104 + diag[!!port_num].num_counters, 2105 + RDMA_HW_STATS_DEFAULT_LIFESPAN); 2106 + } 2107 + 2108 + static int mlx4_ib_get_hw_stats(struct ib_device *ibdev, 2109 + struct rdma_hw_stats *stats, 2110 + u8 port, int index) 2111 + { 2112 + struct mlx4_ib_dev *dev = to_mdev(ibdev); 2113 + struct mlx4_ib_diag_counters *diag = dev->diag_counters; 2114 + u32 hw_value[ARRAY_SIZE(diag_device_only) + 2115 + ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {}; 2116 + int ret; 2117 + int i; 2118 + 2119 + ret = mlx4_query_diag_counters(dev->dev, 2120 + MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS, 2121 + diag[!!port].offset, hw_value, 2122 + diag[!!port].num_counters, port); 2123 + 2124 + if (ret) 2125 + return ret; 2126 + 2127 + for (i = 0; i < diag[!!port].num_counters; i++) 2128 + stats->value[i] = hw_value[i]; 2129 + 2130 + return diag[!!port].num_counters; 2131 + } 2132 + 2133 + static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, 2134 + const char ***name, 2135 + u32 **offset, 2136 + u32 *num, 2137 + bool port) 2138 + { 2139 + u32 num_counters; 2140 + 2141 + num_counters = ARRAY_SIZE(diag_basic); 2142 + 2143 + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) 2144 + num_counters += ARRAY_SIZE(diag_ext); 2145 + 2146 + if (!port) 2147 + num_counters += ARRAY_SIZE(diag_device_only); 2148 + 2149 + *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); 2150 + if (!*name) 2151 + return -ENOMEM; 2152 + 2153 + *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); 2154 + if (!*offset) 2155 + goto err_name; 2156 + 2157 + *num = num_counters; 2158 + 2159 + return 0; 2160 + 2161 + err_name: 2162 + kfree(*name); 2163 + return -ENOMEM; 2164 + } 2165 + 2166 + static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, 2167 + const char **name, 2168 + u32 *offset, 2169 + bool port) 2170 + { 2171 + int i; 2172 + int j; 2173 + 2174 + for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { 2175 + name[i] = diag_basic[i].name; 2176 + offset[i] = diag_basic[i].offset; 2177 + } 2178 + 2179 + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { 2180 + for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { 2181 + name[j] = diag_ext[i].name; 2182 + offset[j] = diag_ext[i].offset; 2183 + } 2184 + } 2185 + 2186 + if (!port) { 2187 + for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { 2188 + name[j] = diag_device_only[i].name; 2189 + offset[j] = diag_device_only[i].offset; 2190 + } 2191 + } 2192 + } 2193 + 2194 + static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) 2195 + { 2196 + struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; 2197 + int i; 2198 + int ret; 2199 + bool per_port = !!(ibdev->dev->caps.flags2 & 2200 + MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); 2201 + 2202 + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { 2203 + /* i == 1 means we are building port counters */ 2204 + if (i && !per_port) 2205 + continue; 2206 + 2207 + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, 2208 + &diag[i].offset, 2209 + &diag[i].num_counters, i); 2210 + if (ret) 2211 + goto err_alloc; 2212 + 2213 + mlx4_ib_fill_diag_counters(ibdev, diag[i].name, 2214 + diag[i].offset, i); 2215 + } 2216 + 2217 + ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; 2218 + ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; 2219 + 2220 + return 0; 2221 + 2222 + err_alloc: 2223 + if (i) { 2224 + kfree(diag[i - 1].name); 2225 + kfree(diag[i - 1].offset); 2226 + } 2227 + 2228 + return ret; 2229 + } 2230 + 2231 + static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev) 2232 + { 2233 + int i; 2234 + 2235 + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { 2236 + kfree(ibdev->diag_counters[i].offset); 2237 + kfree(ibdev->diag_counters[i].name); 2238 + } 2239 + } 2240 + 2052 2241 #define MLX4_IB_INVALID_MAC ((u64)-1) 2053 2242 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, 2054 2243 struct net_device *dev, ··· 2741 2552 for (j = 1; j <= ibdev->dev->caps.num_ports; j++) 2742 2553 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); 2743 2554 2744 - if (ib_register_device(&ibdev->ib_dev, NULL)) 2555 + if (mlx4_ib_alloc_diag_counters(ibdev)) 2745 2556 goto err_steer_free_bitmap; 2557 + 2558 + if (ib_register_device(&ibdev->ib_dev, NULL)) 2559 + goto err_diag_counters; 2746 2560 2747 2561 if (mlx4_ib_mad_init(ibdev)) 2748 2562 goto err_reg; ··· 2811 2619 2812 2620 err_reg: 2813 2621 ib_unregister_device(&ibdev->ib_dev); 2622 + 2623 + err_diag_counters: 2624 + mlx4_ib_diag_cleanup(ibdev); 2814 2625 2815 2626 err_steer_free_bitmap: 2816 2627 kfree(ibdev->ib_uc_qpns_bitmap); ··· 2918 2723 mlx4_ib_close_sriov(ibdev); 2919 2724 mlx4_ib_mad_cleanup(ibdev); 2920 2725 ib_unregister_device(&ibdev->ib_dev); 2726 + mlx4_ib_diag_cleanup(ibdev); 2921 2727 if (ibdev->iboe.nb.notifier_call) { 2922 2728 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 2923 2729 pr_warn("failure unregistering notifier\n");
+9
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 549 549 u32 default_counter; 550 550 }; 551 551 552 + #define MLX4_DIAG_COUNTERS_TYPES 2 553 + 554 + struct mlx4_ib_diag_counters { 555 + const char **name; 556 + u32 *offset; 557 + u32 num_counters; 558 + }; 559 + 552 560 struct mlx4_ib_dev { 553 561 struct ib_device ib_dev; 554 562 struct mlx4_dev *dev; ··· 593 585 /* protect resources needed as part of reset flow */ 594 586 spinlock_t reset_flow_resource_lock; 595 587 struct list_head qp_list; 588 + struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES]; 596 589 }; 597 590 598 591 struct ib_event_work {
+20 -22
drivers/infiniband/hw/mthca/mthca_reset.c
··· 98 98 err = -ENOMEM; 99 99 mthca_err(mdev, "Couldn't allocate memory to save HCA " 100 100 "PCI header, aborting.\n"); 101 - goto out; 101 + goto put_dev; 102 102 } 103 103 104 104 for (i = 0; i < 64; ++i) { ··· 108 108 err = -ENODEV; 109 109 mthca_err(mdev, "Couldn't save HCA " 110 110 "PCI header, aborting.\n"); 111 - goto out; 111 + goto free_hca; 112 112 } 113 113 } 114 114 ··· 121 121 err = -ENOMEM; 122 122 mthca_err(mdev, "Couldn't allocate memory to save HCA " 123 123 "bridge PCI header, aborting.\n"); 124 - goto out; 124 + goto free_hca; 125 125 } 126 126 127 127 for (i = 0; i < 64; ++i) { ··· 131 131 err = -ENODEV; 132 132 mthca_err(mdev, "Couldn't save HCA bridge " 133 133 "PCI header, aborting.\n"); 134 - goto out; 134 + goto free_bh; 135 135 } 136 136 } 137 137 bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); ··· 139 139 err = -ENODEV; 140 140 mthca_err(mdev, "Couldn't locate HCA bridge " 141 141 "PCI-X capability, aborting.\n"); 142 - goto out; 142 + goto free_bh; 143 143 } 144 144 } 145 145 ··· 152 152 err = -ENOMEM; 153 153 mthca_err(mdev, "Couldn't map HCA reset register, " 154 154 "aborting.\n"); 155 - goto out; 155 + goto free_bh; 156 156 } 157 157 158 158 writel(MTHCA_RESET_VALUE, reset); ··· 172 172 err = -ENODEV; 173 173 mthca_err(mdev, "Couldn't access HCA after reset, " 174 174 "aborting.\n"); 175 - goto out; 175 + goto free_bh; 176 176 } 177 177 178 178 if (v != 0xffffffff) ··· 184 184 err = -ENODEV; 185 185 mthca_err(mdev, "PCI device did not come back after reset, " 186 186 "aborting.\n"); 187 - goto out; 187 + goto free_bh; 188 188 } 189 189 190 190 good: ··· 195 195 err = -ENODEV; 196 196 mthca_err(mdev, "Couldn't restore HCA bridge Upstream " 197 197 "split transaction control, aborting.\n"); 198 - goto out; 198 + goto free_bh; 199 199 } 200 200 if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, 201 201 bridge_header[(bridge_pcix_cap + 0xc) / 4])) { 202 202 err = -ENODEV; 203 203 mthca_err(mdev, "Couldn't restore HCA bridge Downstream " 204 204 "split transaction control, aborting.\n"); 205 - goto out; 205 + goto free_bh; 206 206 } 207 207 /* 208 208 * Bridge control register is at 0x3e, so we'll ··· 216 216 err = -ENODEV; 217 217 mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " 218 218 "aborting.\n", i); 219 - goto out; 219 + goto free_bh; 220 220 } 221 221 } 222 222 ··· 225 225 err = -ENODEV; 226 226 mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " 227 227 "aborting.\n"); 228 - goto out; 228 + goto free_bh; 229 229 } 230 230 } 231 231 ··· 235 235 err = -ENODEV; 236 236 mthca_err(mdev, "Couldn't restore HCA PCI-X " 237 237 "command register, aborting.\n"); 238 - goto out; 238 + goto free_bh; 239 239 } 240 240 } 241 241 ··· 246 246 err = -ENODEV; 247 247 mthca_err(mdev, "Couldn't restore HCA PCI Express " 248 248 "Device Control register, aborting.\n"); 249 - goto out; 249 + goto free_bh; 250 250 } 251 251 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; 252 252 if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, ··· 254 254 err = -ENODEV; 255 255 mthca_err(mdev, "Couldn't restore HCA PCI Express " 256 256 "Link control register, aborting.\n"); 257 - goto out; 257 + goto free_bh; 258 258 } 259 259 } 260 260 ··· 266 266 err = -ENODEV; 267 267 mthca_err(mdev, "Couldn't restore HCA reg %x, " 268 268 "aborting.\n", i); 269 - goto out; 269 + goto free_bh; 270 270 } 271 271 } 272 272 ··· 275 275 err = -ENODEV; 276 276 mthca_err(mdev, "Couldn't restore HCA COMMAND, " 277 277 "aborting.\n"); 278 - goto out; 279 278 } 280 - 281 - out: 282 - if (bridge) 283 - pci_dev_put(bridge); 279 + free_bh: 284 280 kfree(bridge_header); 281 + free_hca: 285 282 kfree(hca_header); 286 - 283 + put_dev: 284 + pci_dev_put(bridge); 287 285 return err; 288 286 }
+1
drivers/infiniband/sw/Makefile
··· 1 1 obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ 2 + obj-$(CONFIG_RDMA_RXE) += rxe/
-1
drivers/infiniband/sw/rdmavt/Kconfig
··· 1 1 config INFINIBAND_RDMAVT 2 2 tristate "RDMA verbs transport library" 3 3 depends on 64BIT 4 - default m 5 4 ---help--- 6 5 This is a common software verbs provider for RDMA networks.
+24
drivers/infiniband/sw/rxe/Kconfig
··· 1 + config RDMA_RXE 2 + tristate "Software RDMA over Ethernet (RoCE) driver" 3 + depends on INET && PCI && INFINIBAND 4 + depends on NET_UDP_TUNNEL 5 + ---help--- 6 + This driver implements the InfiniBand RDMA transport over 7 + the Linux network stack. It enables a system with a 8 + standard Ethernet adapter to interoperate with a RoCE 9 + adapter or with another system running the RXE driver. 10 + Documentation on InfiniBand and RoCE can be downloaded at 11 + www.infinibandta.org and www.openfabrics.org. (See also 12 + siw which is a similar software driver for iWARP.) 13 + 14 + The driver is split into two layers, one interfaces with the 15 + Linux RDMA stack and implements a kernel or user space 16 + verbs API. The user space verbs API requires a support 17 + library named librxe which is loaded by the generic user 18 + space verbs API, libibverbs. The other layer interfaces 19 + with the Linux network stack at layer 3. 20 + 21 + To configure and work with soft-RoCE driver please use the 22 + following wiki page under "configure Soft-RoCE (RXE)" section: 23 + 24 + https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+24
drivers/infiniband/sw/rxe/Makefile
··· 1 + obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o 2 + 3 + rdma_rxe-y := \ 4 + rxe.o \ 5 + rxe_comp.o \ 6 + rxe_req.o \ 7 + rxe_resp.o \ 8 + rxe_recv.o \ 9 + rxe_pool.o \ 10 + rxe_queue.o \ 11 + rxe_verbs.o \ 12 + rxe_av.o \ 13 + rxe_srq.o \ 14 + rxe_qp.o \ 15 + rxe_cq.o \ 16 + rxe_mr.o \ 17 + rxe_dma.o \ 18 + rxe_opcode.o \ 19 + rxe_mmap.o \ 20 + rxe_icrc.o \ 21 + rxe_mcast.o \ 22 + rxe_task.o \ 23 + rxe_net.o \ 24 + rxe_sysfs.o
+386
drivers/infiniband/sw/rxe/rxe.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); 38 + MODULE_DESCRIPTION("Soft RDMA transport"); 39 + MODULE_LICENSE("Dual BSD/GPL"); 40 + MODULE_VERSION("0.2"); 41 + 42 + /* free resources for all ports on a device */ 43 + static void rxe_cleanup_ports(struct rxe_dev *rxe) 44 + { 45 + kfree(rxe->port.pkey_tbl); 46 + rxe->port.pkey_tbl = NULL; 47 + 48 + } 49 + 50 + /* free resources for a rxe device all objects created for this device must 51 + * have been destroyed 52 + */ 53 + static void rxe_cleanup(struct rxe_dev *rxe) 54 + { 55 + rxe_pool_cleanup(&rxe->uc_pool); 56 + rxe_pool_cleanup(&rxe->pd_pool); 57 + rxe_pool_cleanup(&rxe->ah_pool); 58 + rxe_pool_cleanup(&rxe->srq_pool); 59 + rxe_pool_cleanup(&rxe->qp_pool); 60 + rxe_pool_cleanup(&rxe->cq_pool); 61 + rxe_pool_cleanup(&rxe->mr_pool); 62 + rxe_pool_cleanup(&rxe->mw_pool); 63 + rxe_pool_cleanup(&rxe->mc_grp_pool); 64 + rxe_pool_cleanup(&rxe->mc_elem_pool); 65 + 66 + rxe_cleanup_ports(rxe); 67 + } 68 + 69 + /* called when all references have been dropped */ 70 + void rxe_release(struct kref *kref) 71 + { 72 + struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); 73 + 74 + rxe_cleanup(rxe); 75 + ib_dealloc_device(&rxe->ib_dev); 76 + } 77 + 78 + void rxe_dev_put(struct rxe_dev *rxe) 79 + { 80 + kref_put(&rxe->ref_cnt, rxe_release); 81 + } 82 + EXPORT_SYMBOL_GPL(rxe_dev_put); 83 + 84 + /* initialize rxe device parameters */ 85 + static int rxe_init_device_param(struct rxe_dev *rxe) 86 + { 87 + rxe->max_inline_data = RXE_MAX_INLINE_DATA; 88 + 89 + rxe->attr.fw_ver = RXE_FW_VER; 90 + rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; 91 + rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; 92 + rxe->attr.vendor_id = RXE_VENDOR_ID; 93 + rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; 94 + rxe->attr.hw_ver = RXE_HW_VER; 95 + rxe->attr.max_qp = RXE_MAX_QP; 96 + rxe->attr.max_qp_wr = RXE_MAX_QP_WR; 97 + rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; 98 + rxe->attr.max_sge = RXE_MAX_SGE; 99 + rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; 100 + rxe->attr.max_cq = RXE_MAX_CQ; 101 + rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; 102 + rxe->attr.max_mr = RXE_MAX_MR; 103 + rxe->attr.max_pd = RXE_MAX_PD; 104 + rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; 105 + rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; 106 + rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; 107 + rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; 108 + rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; 109 + rxe->attr.atomic_cap = RXE_ATOMIC_CAP; 110 + rxe->attr.max_ee = RXE_MAX_EE; 111 + rxe->attr.max_rdd = RXE_MAX_RDD; 112 + rxe->attr.max_mw = RXE_MAX_MW; 113 + rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; 114 + rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; 115 + rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; 116 + rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; 117 + rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; 118 + rxe->attr.max_ah = RXE_MAX_AH; 119 + rxe->attr.max_fmr = RXE_MAX_FMR; 120 + rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; 121 + rxe->attr.max_srq = RXE_MAX_SRQ; 122 + rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; 123 + rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; 124 + rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; 125 + rxe->attr.max_pkeys = RXE_MAX_PKEYS; 126 + rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; 127 + 128 + rxe->max_ucontext = RXE_MAX_UCONTEXT; 129 + 130 + return 0; 131 + } 132 + 133 + /* initialize port attributes */ 134 + static int rxe_init_port_param(struct rxe_port *port) 135 + { 136 + port->attr.state = RXE_PORT_STATE; 137 + port->attr.max_mtu = RXE_PORT_MAX_MTU; 138 + port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; 139 + port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; 140 + port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; 141 + port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; 142 + port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; 143 + port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; 144 + port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; 145 + port->attr.lid = RXE_PORT_LID; 146 + port->attr.sm_lid = RXE_PORT_SM_LID; 147 + port->attr.lmc = RXE_PORT_LMC; 148 + port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; 149 + port->attr.sm_sl = RXE_PORT_SM_SL; 150 + port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; 151 + port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; 152 + port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; 153 + port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; 154 + port->attr.phys_state = RXE_PORT_PHYS_STATE; 155 + port->mtu_cap = 156 + ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); 157 + port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); 158 + 159 + return 0; 160 + } 161 + 162 + /* initialize port state, note IB convention that HCA ports are always 163 + * numbered from 1 164 + */ 165 + static int rxe_init_ports(struct rxe_dev *rxe) 166 + { 167 + struct rxe_port *port = &rxe->port; 168 + 169 + rxe_init_port_param(port); 170 + 171 + if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) 172 + return -EINVAL; 173 + 174 + port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, 175 + sizeof(*port->pkey_tbl), GFP_KERNEL); 176 + 177 + if (!port->pkey_tbl) 178 + return -ENOMEM; 179 + 180 + port->pkey_tbl[0] = 0xffff; 181 + port->port_guid = rxe->ifc_ops->port_guid(rxe); 182 + 183 + spin_lock_init(&port->port_lock); 184 + 185 + return 0; 186 + } 187 + 188 + /* init pools of managed objects */ 189 + static int rxe_init_pools(struct rxe_dev *rxe) 190 + { 191 + int err; 192 + 193 + err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, 194 + rxe->max_ucontext); 195 + if (err) 196 + goto err1; 197 + 198 + err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, 199 + rxe->attr.max_pd); 200 + if (err) 201 + goto err2; 202 + 203 + err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, 204 + rxe->attr.max_ah); 205 + if (err) 206 + goto err3; 207 + 208 + err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, 209 + rxe->attr.max_srq); 210 + if (err) 211 + goto err4; 212 + 213 + err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, 214 + rxe->attr.max_qp); 215 + if (err) 216 + goto err5; 217 + 218 + err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, 219 + rxe->attr.max_cq); 220 + if (err) 221 + goto err6; 222 + 223 + err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, 224 + rxe->attr.max_mr); 225 + if (err) 226 + goto err7; 227 + 228 + err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, 229 + rxe->attr.max_mw); 230 + if (err) 231 + goto err8; 232 + 233 + err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, 234 + rxe->attr.max_mcast_grp); 235 + if (err) 236 + goto err9; 237 + 238 + err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, 239 + rxe->attr.max_total_mcast_qp_attach); 240 + if (err) 241 + goto err10; 242 + 243 + return 0; 244 + 245 + err10: 246 + rxe_pool_cleanup(&rxe->mc_grp_pool); 247 + err9: 248 + rxe_pool_cleanup(&rxe->mw_pool); 249 + err8: 250 + rxe_pool_cleanup(&rxe->mr_pool); 251 + err7: 252 + rxe_pool_cleanup(&rxe->cq_pool); 253 + err6: 254 + rxe_pool_cleanup(&rxe->qp_pool); 255 + err5: 256 + rxe_pool_cleanup(&rxe->srq_pool); 257 + err4: 258 + rxe_pool_cleanup(&rxe->ah_pool); 259 + err3: 260 + rxe_pool_cleanup(&rxe->pd_pool); 261 + err2: 262 + rxe_pool_cleanup(&rxe->uc_pool); 263 + err1: 264 + return err; 265 + } 266 + 267 + /* initialize rxe device state */ 268 + static int rxe_init(struct rxe_dev *rxe) 269 + { 270 + int err; 271 + 272 + /* init default device parameters */ 273 + rxe_init_device_param(rxe); 274 + 275 + err = rxe_init_ports(rxe); 276 + if (err) 277 + goto err1; 278 + 279 + err = rxe_init_pools(rxe); 280 + if (err) 281 + goto err2; 282 + 283 + /* init pending mmap list */ 284 + spin_lock_init(&rxe->mmap_offset_lock); 285 + spin_lock_init(&rxe->pending_lock); 286 + INIT_LIST_HEAD(&rxe->pending_mmaps); 287 + INIT_LIST_HEAD(&rxe->list); 288 + 289 + mutex_init(&rxe->usdev_lock); 290 + 291 + return 0; 292 + 293 + err2: 294 + rxe_cleanup_ports(rxe); 295 + err1: 296 + return err; 297 + } 298 + 299 + int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) 300 + { 301 + struct rxe_port *port = &rxe->port; 302 + enum ib_mtu mtu; 303 + 304 + mtu = eth_mtu_int_to_enum(ndev_mtu); 305 + 306 + /* Make sure that new MTU in range */ 307 + mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; 308 + 309 + port->attr.active_mtu = mtu; 310 + port->mtu_cap = ib_mtu_enum_to_int(mtu); 311 + 312 + return 0; 313 + } 314 + EXPORT_SYMBOL(rxe_set_mtu); 315 + 316 + /* called by ifc layer to create new rxe device. 317 + * The caller should allocate memory for rxe by calling ib_alloc_device. 318 + */ 319 + int rxe_add(struct rxe_dev *rxe, unsigned int mtu) 320 + { 321 + int err; 322 + 323 + kref_init(&rxe->ref_cnt); 324 + 325 + err = rxe_init(rxe); 326 + if (err) 327 + goto err1; 328 + 329 + err = rxe_set_mtu(rxe, mtu); 330 + if (err) 331 + goto err1; 332 + 333 + err = rxe_register_device(rxe); 334 + if (err) 335 + goto err1; 336 + 337 + return 0; 338 + 339 + err1: 340 + rxe_dev_put(rxe); 341 + return err; 342 + } 343 + EXPORT_SYMBOL(rxe_add); 344 + 345 + /* called by the ifc layer to remove a device */ 346 + void rxe_remove(struct rxe_dev *rxe) 347 + { 348 + rxe_unregister_device(rxe); 349 + 350 + rxe_dev_put(rxe); 351 + } 352 + EXPORT_SYMBOL(rxe_remove); 353 + 354 + static int __init rxe_module_init(void) 355 + { 356 + int err; 357 + 358 + /* initialize slab caches for managed objects */ 359 + err = rxe_cache_init(); 360 + if (err) { 361 + pr_err("rxe: unable to init object pools\n"); 362 + return err; 363 + } 364 + 365 + err = rxe_net_init(); 366 + if (err) { 367 + pr_err("rxe: unable to init\n"); 368 + rxe_cache_exit(); 369 + return err; 370 + } 371 + pr_info("rxe: loaded\n"); 372 + 373 + return 0; 374 + } 375 + 376 + static void __exit rxe_module_exit(void) 377 + { 378 + rxe_remove_all(); 379 + rxe_net_exit(); 380 + rxe_cache_exit(); 381 + 382 + pr_info("rxe: unloaded\n"); 383 + } 384 + 385 + module_init(rxe_module_init); 386 + module_exit(rxe_module_exit);
+77
drivers/infiniband/sw/rxe/rxe.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_H 35 + #define RXE_H 36 + 37 + #include <linux/module.h> 38 + #include <linux/skbuff.h> 39 + #include <linux/crc32.h> 40 + 41 + #include <rdma/ib_verbs.h> 42 + #include <rdma/ib_user_verbs.h> 43 + #include <rdma/ib_pack.h> 44 + #include <rdma/ib_smi.h> 45 + #include <rdma/ib_umem.h> 46 + #include <rdma/ib_cache.h> 47 + #include <rdma/ib_addr.h> 48 + 49 + #include "rxe_net.h" 50 + #include "rxe_opcode.h" 51 + #include "rxe_hdr.h" 52 + #include "rxe_param.h" 53 + #include "rxe_verbs.h" 54 + 55 + #define RXE_UVERBS_ABI_VERSION (1) 56 + 57 + #define IB_PHYS_STATE_LINK_UP (5) 58 + #define IB_PHYS_STATE_LINK_DOWN (3) 59 + 60 + #define RXE_ROCE_V2_SPORT (0xc000) 61 + 62 + int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); 63 + 64 + int rxe_add(struct rxe_dev *rxe, unsigned int mtu); 65 + void rxe_remove(struct rxe_dev *rxe); 66 + void rxe_remove_all(void); 67 + 68 + int rxe_rcv(struct sk_buff *skb); 69 + 70 + void rxe_dev_put(struct rxe_dev *rxe); 71 + struct rxe_dev *net_to_rxe(struct net_device *ndev); 72 + struct rxe_dev *get_rxe_by_name(const char* name); 73 + 74 + void rxe_port_up(struct rxe_dev *rxe); 75 + void rxe_port_down(struct rxe_dev *rxe); 76 + 77 + #endif /* RXE_H */
+98
drivers/infiniband/sw/rxe/rxe_av.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) 38 + { 39 + struct rxe_port *port; 40 + 41 + if (attr->port_num != 1) { 42 + pr_info("rxe: invalid port_num = %d\n", attr->port_num); 43 + return -EINVAL; 44 + } 45 + 46 + port = &rxe->port; 47 + 48 + if (attr->ah_flags & IB_AH_GRH) { 49 + if (attr->grh.sgid_index > port->attr.gid_tbl_len) { 50 + pr_info("rxe: invalid sgid index = %d\n", 51 + attr->grh.sgid_index); 52 + return -EINVAL; 53 + } 54 + } 55 + 56 + return 0; 57 + } 58 + 59 + int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, 60 + struct rxe_av *av, struct ib_ah_attr *attr) 61 + { 62 + memset(av, 0, sizeof(*av)); 63 + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); 64 + av->port_num = port_num; 65 + return 0; 66 + } 67 + 68 + int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, 69 + struct ib_ah_attr *attr) 70 + { 71 + memcpy(&attr->grh, &av->grh, sizeof(av->grh)); 72 + attr->port_num = av->port_num; 73 + return 0; 74 + } 75 + 76 + int rxe_av_fill_ip_info(struct rxe_dev *rxe, 77 + struct rxe_av *av, 78 + struct ib_ah_attr *attr, 79 + struct ib_gid_attr *sgid_attr, 80 + union ib_gid *sgid) 81 + { 82 + rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); 83 + rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid); 84 + av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); 85 + 86 + return 0; 87 + } 88 + 89 + struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) 90 + { 91 + if (!pkt || !pkt->qp) 92 + return NULL; 93 + 94 + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) 95 + return &pkt->qp->pri_av; 96 + 97 + return (pkt->wqe) ? &pkt->wqe->av : NULL; 98 + }
+734
drivers/infiniband/sw/rxe/rxe_comp.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + 36 + #include "rxe.h" 37 + #include "rxe_loc.h" 38 + #include "rxe_queue.h" 39 + #include "rxe_task.h" 40 + 41 + enum comp_state { 42 + COMPST_GET_ACK, 43 + COMPST_GET_WQE, 44 + COMPST_COMP_WQE, 45 + COMPST_COMP_ACK, 46 + COMPST_CHECK_PSN, 47 + COMPST_CHECK_ACK, 48 + COMPST_READ, 49 + COMPST_ATOMIC, 50 + COMPST_WRITE_SEND, 51 + COMPST_UPDATE_COMP, 52 + COMPST_ERROR_RETRY, 53 + COMPST_RNR_RETRY, 54 + COMPST_ERROR, 55 + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ 56 + COMPST_DONE, /* The completer finished successflly */ 57 + }; 58 + 59 + static char *comp_state_name[] = { 60 + [COMPST_GET_ACK] = "GET ACK", 61 + [COMPST_GET_WQE] = "GET WQE", 62 + [COMPST_COMP_WQE] = "COMP WQE", 63 + [COMPST_COMP_ACK] = "COMP ACK", 64 + [COMPST_CHECK_PSN] = "CHECK PSN", 65 + [COMPST_CHECK_ACK] = "CHECK ACK", 66 + [COMPST_READ] = "READ", 67 + [COMPST_ATOMIC] = "ATOMIC", 68 + [COMPST_WRITE_SEND] = "WRITE/SEND", 69 + [COMPST_UPDATE_COMP] = "UPDATE COMP", 70 + [COMPST_ERROR_RETRY] = "ERROR RETRY", 71 + [COMPST_RNR_RETRY] = "RNR RETRY", 72 + [COMPST_ERROR] = "ERROR", 73 + [COMPST_EXIT] = "EXIT", 74 + [COMPST_DONE] = "DONE", 75 + }; 76 + 77 + static unsigned long rnrnak_usec[32] = { 78 + [IB_RNR_TIMER_655_36] = 655360, 79 + [IB_RNR_TIMER_000_01] = 10, 80 + [IB_RNR_TIMER_000_02] = 20, 81 + [IB_RNR_TIMER_000_03] = 30, 82 + [IB_RNR_TIMER_000_04] = 40, 83 + [IB_RNR_TIMER_000_06] = 60, 84 + [IB_RNR_TIMER_000_08] = 80, 85 + [IB_RNR_TIMER_000_12] = 120, 86 + [IB_RNR_TIMER_000_16] = 160, 87 + [IB_RNR_TIMER_000_24] = 240, 88 + [IB_RNR_TIMER_000_32] = 320, 89 + [IB_RNR_TIMER_000_48] = 480, 90 + [IB_RNR_TIMER_000_64] = 640, 91 + [IB_RNR_TIMER_000_96] = 960, 92 + [IB_RNR_TIMER_001_28] = 1280, 93 + [IB_RNR_TIMER_001_92] = 1920, 94 + [IB_RNR_TIMER_002_56] = 2560, 95 + [IB_RNR_TIMER_003_84] = 3840, 96 + [IB_RNR_TIMER_005_12] = 5120, 97 + [IB_RNR_TIMER_007_68] = 7680, 98 + [IB_RNR_TIMER_010_24] = 10240, 99 + [IB_RNR_TIMER_015_36] = 15360, 100 + [IB_RNR_TIMER_020_48] = 20480, 101 + [IB_RNR_TIMER_030_72] = 30720, 102 + [IB_RNR_TIMER_040_96] = 40960, 103 + [IB_RNR_TIMER_061_44] = 61410, 104 + [IB_RNR_TIMER_081_92] = 81920, 105 + [IB_RNR_TIMER_122_88] = 122880, 106 + [IB_RNR_TIMER_163_84] = 163840, 107 + [IB_RNR_TIMER_245_76] = 245760, 108 + [IB_RNR_TIMER_327_68] = 327680, 109 + [IB_RNR_TIMER_491_52] = 491520, 110 + }; 111 + 112 + static inline unsigned long rnrnak_jiffies(u8 timeout) 113 + { 114 + return max_t(unsigned long, 115 + usecs_to_jiffies(rnrnak_usec[timeout]), 1); 116 + } 117 + 118 + static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) 119 + { 120 + switch (opcode) { 121 + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; 122 + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; 123 + case IB_WR_SEND: return IB_WC_SEND; 124 + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; 125 + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; 126 + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; 127 + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; 128 + case IB_WR_LSO: return IB_WC_LSO; 129 + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; 130 + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; 131 + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; 132 + case IB_WR_REG_MR: return IB_WC_REG_MR; 133 + 134 + default: 135 + return 0xff; 136 + } 137 + } 138 + 139 + void retransmit_timer(unsigned long data) 140 + { 141 + struct rxe_qp *qp = (struct rxe_qp *)data; 142 + 143 + if (qp->valid) { 144 + qp->comp.timeout = 1; 145 + rxe_run_task(&qp->comp.task, 1); 146 + } 147 + } 148 + 149 + void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, 150 + struct sk_buff *skb) 151 + { 152 + int must_sched; 153 + 154 + skb_queue_tail(&qp->resp_pkts, skb); 155 + 156 + must_sched = skb_queue_len(&qp->resp_pkts) > 1; 157 + rxe_run_task(&qp->comp.task, must_sched); 158 + } 159 + 160 + static inline enum comp_state get_wqe(struct rxe_qp *qp, 161 + struct rxe_pkt_info *pkt, 162 + struct rxe_send_wqe **wqe_p) 163 + { 164 + struct rxe_send_wqe *wqe; 165 + 166 + /* we come here whether or not we found a response packet to see if 167 + * there are any posted WQEs 168 + */ 169 + wqe = queue_head(qp->sq.queue); 170 + *wqe_p = wqe; 171 + 172 + /* no WQE or requester has not started it yet */ 173 + if (!wqe || wqe->state == wqe_state_posted) 174 + return pkt ? COMPST_DONE : COMPST_EXIT; 175 + 176 + /* WQE does not require an ack */ 177 + if (wqe->state == wqe_state_done) 178 + return COMPST_COMP_WQE; 179 + 180 + /* WQE caused an error */ 181 + if (wqe->state == wqe_state_error) 182 + return COMPST_ERROR; 183 + 184 + /* we have a WQE, if we also have an ack check its PSN */ 185 + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; 186 + } 187 + 188 + static inline void reset_retry_counters(struct rxe_qp *qp) 189 + { 190 + qp->comp.retry_cnt = qp->attr.retry_cnt; 191 + qp->comp.rnr_retry = qp->attr.rnr_retry; 192 + } 193 + 194 + static inline enum comp_state check_psn(struct rxe_qp *qp, 195 + struct rxe_pkt_info *pkt, 196 + struct rxe_send_wqe *wqe) 197 + { 198 + s32 diff; 199 + 200 + /* check to see if response is past the oldest WQE. if it is, complete 201 + * send/write or error read/atomic 202 + */ 203 + diff = psn_compare(pkt->psn, wqe->last_psn); 204 + if (diff > 0) { 205 + if (wqe->state == wqe_state_pending) { 206 + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) 207 + return COMPST_ERROR_RETRY; 208 + 209 + reset_retry_counters(qp); 210 + return COMPST_COMP_WQE; 211 + } else { 212 + return COMPST_DONE; 213 + } 214 + } 215 + 216 + /* compare response packet to expected response */ 217 + diff = psn_compare(pkt->psn, qp->comp.psn); 218 + if (diff < 0) { 219 + /* response is most likely a retried packet if it matches an 220 + * uncompleted WQE go complete it else ignore it 221 + */ 222 + if (pkt->psn == wqe->last_psn) 223 + return COMPST_COMP_ACK; 224 + else 225 + return COMPST_DONE; 226 + } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { 227 + return COMPST_ERROR_RETRY; 228 + } else { 229 + return COMPST_CHECK_ACK; 230 + } 231 + } 232 + 233 + static inline enum comp_state check_ack(struct rxe_qp *qp, 234 + struct rxe_pkt_info *pkt, 235 + struct rxe_send_wqe *wqe) 236 + { 237 + unsigned int mask = pkt->mask; 238 + u8 syn; 239 + 240 + /* Check the sequence only */ 241 + switch (qp->comp.opcode) { 242 + case -1: 243 + /* Will catch all *_ONLY cases. */ 244 + if (!(mask & RXE_START_MASK)) 245 + return COMPST_ERROR; 246 + 247 + break; 248 + 249 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: 250 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: 251 + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && 252 + pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { 253 + return COMPST_ERROR; 254 + } 255 + break; 256 + default: 257 + WARN_ON(1); 258 + } 259 + 260 + /* Check operation validity. */ 261 + switch (pkt->opcode) { 262 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: 263 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: 264 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: 265 + syn = aeth_syn(pkt); 266 + 267 + if ((syn & AETH_TYPE_MASK) != AETH_ACK) 268 + return COMPST_ERROR; 269 + 270 + /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE 271 + * doesn't have an AETH) 272 + */ 273 + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: 274 + if (wqe->wr.opcode != IB_WR_RDMA_READ && 275 + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { 276 + return COMPST_ERROR; 277 + } 278 + reset_retry_counters(qp); 279 + return COMPST_READ; 280 + 281 + case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: 282 + syn = aeth_syn(pkt); 283 + 284 + if ((syn & AETH_TYPE_MASK) != AETH_ACK) 285 + return COMPST_ERROR; 286 + 287 + if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && 288 + wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) 289 + return COMPST_ERROR; 290 + reset_retry_counters(qp); 291 + return COMPST_ATOMIC; 292 + 293 + case IB_OPCODE_RC_ACKNOWLEDGE: 294 + syn = aeth_syn(pkt); 295 + switch (syn & AETH_TYPE_MASK) { 296 + case AETH_ACK: 297 + reset_retry_counters(qp); 298 + return COMPST_WRITE_SEND; 299 + 300 + case AETH_RNR_NAK: 301 + return COMPST_RNR_RETRY; 302 + 303 + case AETH_NAK: 304 + switch (syn) { 305 + case AETH_NAK_PSN_SEQ_ERROR: 306 + /* a nak implicitly acks all packets with psns 307 + * before 308 + */ 309 + if (psn_compare(pkt->psn, qp->comp.psn) > 0) { 310 + qp->comp.psn = pkt->psn; 311 + if (qp->req.wait_psn) { 312 + qp->req.wait_psn = 0; 313 + rxe_run_task(&qp->req.task, 1); 314 + } 315 + } 316 + return COMPST_ERROR_RETRY; 317 + 318 + case AETH_NAK_INVALID_REQ: 319 + wqe->status = IB_WC_REM_INV_REQ_ERR; 320 + return COMPST_ERROR; 321 + 322 + case AETH_NAK_REM_ACC_ERR: 323 + wqe->status = IB_WC_REM_ACCESS_ERR; 324 + return COMPST_ERROR; 325 + 326 + case AETH_NAK_REM_OP_ERR: 327 + wqe->status = IB_WC_REM_OP_ERR; 328 + return COMPST_ERROR; 329 + 330 + default: 331 + pr_warn("unexpected nak %x\n", syn); 332 + wqe->status = IB_WC_REM_OP_ERR; 333 + return COMPST_ERROR; 334 + } 335 + 336 + default: 337 + return COMPST_ERROR; 338 + } 339 + break; 340 + 341 + default: 342 + pr_warn("unexpected opcode\n"); 343 + } 344 + 345 + return COMPST_ERROR; 346 + } 347 + 348 + static inline enum comp_state do_read(struct rxe_qp *qp, 349 + struct rxe_pkt_info *pkt, 350 + struct rxe_send_wqe *wqe) 351 + { 352 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 353 + int ret; 354 + 355 + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, 356 + &wqe->dma, payload_addr(pkt), 357 + payload_size(pkt), to_mem_obj, NULL); 358 + if (ret) 359 + return COMPST_ERROR; 360 + 361 + if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) 362 + return COMPST_COMP_ACK; 363 + else 364 + return COMPST_UPDATE_COMP; 365 + } 366 + 367 + static inline enum comp_state do_atomic(struct rxe_qp *qp, 368 + struct rxe_pkt_info *pkt, 369 + struct rxe_send_wqe *wqe) 370 + { 371 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 372 + int ret; 373 + 374 + u64 atomic_orig = atmack_orig(pkt); 375 + 376 + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, 377 + &wqe->dma, &atomic_orig, 378 + sizeof(u64), to_mem_obj, NULL); 379 + if (ret) 380 + return COMPST_ERROR; 381 + else 382 + return COMPST_COMP_ACK; 383 + } 384 + 385 + static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 386 + struct rxe_cqe *cqe) 387 + { 388 + memset(cqe, 0, sizeof(*cqe)); 389 + 390 + if (!qp->is_user) { 391 + struct ib_wc *wc = &cqe->ibwc; 392 + 393 + wc->wr_id = wqe->wr.wr_id; 394 + wc->status = wqe->status; 395 + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 396 + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 397 + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 398 + wc->wc_flags = IB_WC_WITH_IMM; 399 + wc->byte_len = wqe->dma.length; 400 + wc->qp = &qp->ibqp; 401 + } else { 402 + struct ib_uverbs_wc *uwc = &cqe->uibwc; 403 + 404 + uwc->wr_id = wqe->wr.wr_id; 405 + uwc->status = wqe->status; 406 + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 407 + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 408 + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 409 + uwc->wc_flags = IB_WC_WITH_IMM; 410 + uwc->byte_len = wqe->dma.length; 411 + uwc->qp_num = qp->ibqp.qp_num; 412 + } 413 + } 414 + 415 + static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 416 + { 417 + struct rxe_cqe cqe; 418 + 419 + if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || 420 + (wqe->wr.send_flags & IB_SEND_SIGNALED) || 421 + (qp->req.state == QP_STATE_ERROR)) { 422 + make_send_cqe(qp, wqe, &cqe); 423 + rxe_cq_post(qp->scq, &cqe, 0); 424 + } 425 + 426 + advance_consumer(qp->sq.queue); 427 + 428 + /* 429 + * we completed something so let req run again 430 + * if it is trying to fence 431 + */ 432 + if (qp->req.wait_fence) { 433 + qp->req.wait_fence = 0; 434 + rxe_run_task(&qp->req.task, 1); 435 + } 436 + } 437 + 438 + static inline enum comp_state complete_ack(struct rxe_qp *qp, 439 + struct rxe_pkt_info *pkt, 440 + struct rxe_send_wqe *wqe) 441 + { 442 + unsigned long flags; 443 + 444 + if (wqe->has_rd_atomic) { 445 + wqe->has_rd_atomic = 0; 446 + atomic_inc(&qp->req.rd_atomic); 447 + if (qp->req.need_rd_atomic) { 448 + qp->comp.timeout_retry = 0; 449 + qp->req.need_rd_atomic = 0; 450 + rxe_run_task(&qp->req.task, 1); 451 + } 452 + } 453 + 454 + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { 455 + /* state_lock used by requester & completer */ 456 + spin_lock_irqsave(&qp->state_lock, flags); 457 + if ((qp->req.state == QP_STATE_DRAIN) && 458 + (qp->comp.psn == qp->req.psn)) { 459 + qp->req.state = QP_STATE_DRAINED; 460 + spin_unlock_irqrestore(&qp->state_lock, flags); 461 + 462 + if (qp->ibqp.event_handler) { 463 + struct ib_event ev; 464 + 465 + ev.device = qp->ibqp.device; 466 + ev.element.qp = &qp->ibqp; 467 + ev.event = IB_EVENT_SQ_DRAINED; 468 + qp->ibqp.event_handler(&ev, 469 + qp->ibqp.qp_context); 470 + } 471 + } else { 472 + spin_unlock_irqrestore(&qp->state_lock, flags); 473 + } 474 + } 475 + 476 + do_complete(qp, wqe); 477 + 478 + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) 479 + return COMPST_UPDATE_COMP; 480 + else 481 + return COMPST_DONE; 482 + } 483 + 484 + static inline enum comp_state complete_wqe(struct rxe_qp *qp, 485 + struct rxe_pkt_info *pkt, 486 + struct rxe_send_wqe *wqe) 487 + { 488 + qp->comp.opcode = -1; 489 + 490 + if (pkt) { 491 + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) 492 + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 493 + 494 + if (qp->req.wait_psn) { 495 + qp->req.wait_psn = 0; 496 + rxe_run_task(&qp->req.task, 1); 497 + } 498 + } 499 + 500 + do_complete(qp, wqe); 501 + 502 + return COMPST_GET_WQE; 503 + } 504 + 505 + int rxe_completer(void *arg) 506 + { 507 + struct rxe_qp *qp = (struct rxe_qp *)arg; 508 + struct rxe_send_wqe *wqe = wqe; 509 + struct sk_buff *skb = NULL; 510 + struct rxe_pkt_info *pkt = NULL; 511 + enum comp_state state; 512 + 513 + if (!qp->valid) { 514 + while ((skb = skb_dequeue(&qp->resp_pkts))) { 515 + rxe_drop_ref(qp); 516 + kfree_skb(skb); 517 + } 518 + skb = NULL; 519 + pkt = NULL; 520 + 521 + while (queue_head(qp->sq.queue)) 522 + advance_consumer(qp->sq.queue); 523 + 524 + goto exit; 525 + } 526 + 527 + if (qp->req.state == QP_STATE_ERROR) { 528 + while ((skb = skb_dequeue(&qp->resp_pkts))) { 529 + rxe_drop_ref(qp); 530 + kfree_skb(skb); 531 + } 532 + skb = NULL; 533 + pkt = NULL; 534 + 535 + while ((wqe = queue_head(qp->sq.queue))) { 536 + wqe->status = IB_WC_WR_FLUSH_ERR; 537 + do_complete(qp, wqe); 538 + } 539 + 540 + goto exit; 541 + } 542 + 543 + if (qp->req.state == QP_STATE_RESET) { 544 + while ((skb = skb_dequeue(&qp->resp_pkts))) { 545 + rxe_drop_ref(qp); 546 + kfree_skb(skb); 547 + } 548 + skb = NULL; 549 + pkt = NULL; 550 + 551 + while (queue_head(qp->sq.queue)) 552 + advance_consumer(qp->sq.queue); 553 + 554 + goto exit; 555 + } 556 + 557 + if (qp->comp.timeout) { 558 + qp->comp.timeout_retry = 1; 559 + qp->comp.timeout = 0; 560 + } else { 561 + qp->comp.timeout_retry = 0; 562 + } 563 + 564 + if (qp->req.need_retry) 565 + goto exit; 566 + 567 + state = COMPST_GET_ACK; 568 + 569 + while (1) { 570 + pr_debug("state = %s\n", comp_state_name[state]); 571 + switch (state) { 572 + case COMPST_GET_ACK: 573 + skb = skb_dequeue(&qp->resp_pkts); 574 + if (skb) { 575 + pkt = SKB_TO_PKT(skb); 576 + qp->comp.timeout_retry = 0; 577 + } 578 + state = COMPST_GET_WQE; 579 + break; 580 + 581 + case COMPST_GET_WQE: 582 + state = get_wqe(qp, pkt, &wqe); 583 + break; 584 + 585 + case COMPST_CHECK_PSN: 586 + state = check_psn(qp, pkt, wqe); 587 + break; 588 + 589 + case COMPST_CHECK_ACK: 590 + state = check_ack(qp, pkt, wqe); 591 + break; 592 + 593 + case COMPST_READ: 594 + state = do_read(qp, pkt, wqe); 595 + break; 596 + 597 + case COMPST_ATOMIC: 598 + state = do_atomic(qp, pkt, wqe); 599 + break; 600 + 601 + case COMPST_WRITE_SEND: 602 + if (wqe->state == wqe_state_pending && 603 + wqe->last_psn == pkt->psn) 604 + state = COMPST_COMP_ACK; 605 + else 606 + state = COMPST_UPDATE_COMP; 607 + break; 608 + 609 + case COMPST_COMP_ACK: 610 + state = complete_ack(qp, pkt, wqe); 611 + break; 612 + 613 + case COMPST_COMP_WQE: 614 + state = complete_wqe(qp, pkt, wqe); 615 + break; 616 + 617 + case COMPST_UPDATE_COMP: 618 + if (pkt->mask & RXE_END_MASK) 619 + qp->comp.opcode = -1; 620 + else 621 + qp->comp.opcode = pkt->opcode; 622 + 623 + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) 624 + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 625 + 626 + if (qp->req.wait_psn) { 627 + qp->req.wait_psn = 0; 628 + rxe_run_task(&qp->req.task, 1); 629 + } 630 + 631 + state = COMPST_DONE; 632 + break; 633 + 634 + case COMPST_DONE: 635 + if (pkt) { 636 + rxe_drop_ref(pkt->qp); 637 + kfree_skb(skb); 638 + } 639 + goto done; 640 + 641 + case COMPST_EXIT: 642 + if (qp->comp.timeout_retry && wqe) { 643 + state = COMPST_ERROR_RETRY; 644 + break; 645 + } 646 + 647 + /* re reset the timeout counter if 648 + * (1) QP is type RC 649 + * (2) the QP is alive 650 + * (3) there is a packet sent by the requester that 651 + * might be acked (we still might get spurious 652 + * timeouts but try to keep them as few as possible) 653 + * (4) the timeout parameter is set 654 + */ 655 + if ((qp_type(qp) == IB_QPT_RC) && 656 + (qp->req.state == QP_STATE_READY) && 657 + (psn_compare(qp->req.psn, qp->comp.psn) > 0) && 658 + qp->qp_timeout_jiffies) 659 + mod_timer(&qp->retrans_timer, 660 + jiffies + qp->qp_timeout_jiffies); 661 + goto exit; 662 + 663 + case COMPST_ERROR_RETRY: 664 + /* we come here if the retry timer fired and we did 665 + * not receive a response packet. try to retry the send 666 + * queue if that makes sense and the limits have not 667 + * been exceeded. remember that some timeouts are 668 + * spurious since we do not reset the timer but kick 669 + * it down the road or let it expire 670 + */ 671 + 672 + /* there is nothing to retry in this case */ 673 + if (!wqe || (wqe->state == wqe_state_posted)) 674 + goto exit; 675 + 676 + if (qp->comp.retry_cnt > 0) { 677 + if (qp->comp.retry_cnt != 7) 678 + qp->comp.retry_cnt--; 679 + 680 + /* no point in retrying if we have already 681 + * seen the last ack that the requester could 682 + * have caused 683 + */ 684 + if (psn_compare(qp->req.psn, 685 + qp->comp.psn) > 0) { 686 + /* tell the requester to retry the 687 + * send send queue next time around 688 + */ 689 + qp->req.need_retry = 1; 690 + rxe_run_task(&qp->req.task, 1); 691 + } 692 + goto exit; 693 + } else { 694 + wqe->status = IB_WC_RETRY_EXC_ERR; 695 + state = COMPST_ERROR; 696 + } 697 + break; 698 + 699 + case COMPST_RNR_RETRY: 700 + if (qp->comp.rnr_retry > 0) { 701 + if (qp->comp.rnr_retry != 7) 702 + qp->comp.rnr_retry--; 703 + 704 + qp->req.need_retry = 1; 705 + pr_debug("set rnr nak timer\n"); 706 + mod_timer(&qp->rnr_nak_timer, 707 + jiffies + rnrnak_jiffies(aeth_syn(pkt) 708 + & ~AETH_TYPE_MASK)); 709 + goto exit; 710 + } else { 711 + wqe->status = IB_WC_RNR_RETRY_EXC_ERR; 712 + state = COMPST_ERROR; 713 + } 714 + break; 715 + 716 + case COMPST_ERROR: 717 + do_complete(qp, wqe); 718 + rxe_qp_error(qp); 719 + goto exit; 720 + } 721 + } 722 + 723 + exit: 724 + /* we come here if we are done with processing and want the task to 725 + * exit from the loop calling us 726 + */ 727 + return -EAGAIN; 728 + 729 + done: 730 + /* we come here if we have processed a packet we want the task to call 731 + * us again to see if there is anything else to do 732 + */ 733 + return 0; 734 + }
+165
drivers/infiniband/sw/rxe/rxe_cq.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + #include "rxe_queue.h" 37 + 38 + int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, 39 + int cqe, int comp_vector, struct ib_udata *udata) 40 + { 41 + int count; 42 + 43 + if (cqe <= 0) { 44 + pr_warn("cqe(%d) <= 0\n", cqe); 45 + goto err1; 46 + } 47 + 48 + if (cqe > rxe->attr.max_cqe) { 49 + pr_warn("cqe(%d) > max_cqe(%d)\n", 50 + cqe, rxe->attr.max_cqe); 51 + goto err1; 52 + } 53 + 54 + if (cq) { 55 + count = queue_count(cq->queue); 56 + if (cqe < count) { 57 + pr_warn("cqe(%d) < current # elements in queue (%d)", 58 + cqe, count); 59 + goto err1; 60 + } 61 + } 62 + 63 + return 0; 64 + 65 + err1: 66 + return -EINVAL; 67 + } 68 + 69 + static void rxe_send_complete(unsigned long data) 70 + { 71 + struct rxe_cq *cq = (struct rxe_cq *)data; 72 + 73 + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 74 + } 75 + 76 + int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, 77 + int comp_vector, struct ib_ucontext *context, 78 + struct ib_udata *udata) 79 + { 80 + int err; 81 + 82 + cq->queue = rxe_queue_init(rxe, &cqe, 83 + sizeof(struct rxe_cqe)); 84 + if (!cq->queue) { 85 + pr_warn("unable to create cq\n"); 86 + return -ENOMEM; 87 + } 88 + 89 + err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, 90 + cq->queue->buf_size, &cq->queue->ip); 91 + if (err) { 92 + kvfree(cq->queue->buf); 93 + kfree(cq->queue); 94 + return err; 95 + } 96 + 97 + if (udata) 98 + cq->is_user = 1; 99 + 100 + tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); 101 + 102 + spin_lock_init(&cq->cq_lock); 103 + cq->ibcq.cqe = cqe; 104 + return 0; 105 + } 106 + 107 + int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata) 108 + { 109 + int err; 110 + 111 + err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, 112 + sizeof(struct rxe_cqe), 113 + cq->queue->ip ? cq->queue->ip->context : NULL, 114 + udata, NULL, &cq->cq_lock); 115 + if (!err) 116 + cq->ibcq.cqe = cqe; 117 + 118 + return err; 119 + } 120 + 121 + int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) 122 + { 123 + struct ib_event ev; 124 + unsigned long flags; 125 + 126 + spin_lock_irqsave(&cq->cq_lock, flags); 127 + 128 + if (unlikely(queue_full(cq->queue))) { 129 + spin_unlock_irqrestore(&cq->cq_lock, flags); 130 + if (cq->ibcq.event_handler) { 131 + ev.device = cq->ibcq.device; 132 + ev.element.cq = &cq->ibcq; 133 + ev.event = IB_EVENT_CQ_ERR; 134 + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 135 + } 136 + 137 + return -EBUSY; 138 + } 139 + 140 + memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); 141 + 142 + /* make sure all changes to the CQ are written before we update the 143 + * producer pointer 144 + */ 145 + smp_wmb(); 146 + 147 + advance_producer(cq->queue); 148 + spin_unlock_irqrestore(&cq->cq_lock, flags); 149 + 150 + if ((cq->notify == IB_CQ_NEXT_COMP) || 151 + (cq->notify == IB_CQ_SOLICITED && solicited)) { 152 + cq->notify = 0; 153 + tasklet_schedule(&cq->comp_task); 154 + } 155 + 156 + return 0; 157 + } 158 + 159 + void rxe_cq_cleanup(void *arg) 160 + { 161 + struct rxe_cq *cq = arg; 162 + 163 + if (cq->queue) 164 + rxe_queue_cleanup(cq->queue); 165 + }
+166
drivers/infiniband/sw/rxe/rxe_dma.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + #define DMA_BAD_ADDER ((u64)0) 38 + 39 + static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr) 40 + { 41 + return dma_addr == DMA_BAD_ADDER; 42 + } 43 + 44 + static u64 rxe_dma_map_single(struct ib_device *dev, 45 + void *cpu_addr, size_t size, 46 + enum dma_data_direction direction) 47 + { 48 + WARN_ON(!valid_dma_direction(direction)); 49 + return (uintptr_t)cpu_addr; 50 + } 51 + 52 + static void rxe_dma_unmap_single(struct ib_device *dev, 53 + u64 addr, size_t size, 54 + enum dma_data_direction direction) 55 + { 56 + WARN_ON(!valid_dma_direction(direction)); 57 + } 58 + 59 + static u64 rxe_dma_map_page(struct ib_device *dev, 60 + struct page *page, 61 + unsigned long offset, 62 + size_t size, enum dma_data_direction direction) 63 + { 64 + u64 addr; 65 + 66 + WARN_ON(!valid_dma_direction(direction)); 67 + 68 + if (offset + size > PAGE_SIZE) { 69 + addr = DMA_BAD_ADDER; 70 + goto done; 71 + } 72 + 73 + addr = (uintptr_t)page_address(page); 74 + if (addr) 75 + addr += offset; 76 + 77 + done: 78 + return addr; 79 + } 80 + 81 + static void rxe_dma_unmap_page(struct ib_device *dev, 82 + u64 addr, size_t size, 83 + enum dma_data_direction direction) 84 + { 85 + WARN_ON(!valid_dma_direction(direction)); 86 + } 87 + 88 + static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl, 89 + int nents, enum dma_data_direction direction) 90 + { 91 + struct scatterlist *sg; 92 + u64 addr; 93 + int i; 94 + int ret = nents; 95 + 96 + WARN_ON(!valid_dma_direction(direction)); 97 + 98 + for_each_sg(sgl, sg, nents, i) { 99 + addr = (uintptr_t)page_address(sg_page(sg)); 100 + if (!addr) { 101 + ret = 0; 102 + break; 103 + } 104 + sg->dma_address = addr + sg->offset; 105 + #ifdef CONFIG_NEED_SG_DMA_LENGTH 106 + sg->dma_length = sg->length; 107 + #endif 108 + } 109 + 110 + return ret; 111 + } 112 + 113 + static void rxe_unmap_sg(struct ib_device *dev, 114 + struct scatterlist *sg, int nents, 115 + enum dma_data_direction direction) 116 + { 117 + WARN_ON(!valid_dma_direction(direction)); 118 + } 119 + 120 + static void rxe_sync_single_for_cpu(struct ib_device *dev, 121 + u64 addr, 122 + size_t size, enum dma_data_direction dir) 123 + { 124 + } 125 + 126 + static void rxe_sync_single_for_device(struct ib_device *dev, 127 + u64 addr, 128 + size_t size, enum dma_data_direction dir) 129 + { 130 + } 131 + 132 + static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size, 133 + u64 *dma_handle, gfp_t flag) 134 + { 135 + struct page *p; 136 + void *addr = NULL; 137 + 138 + p = alloc_pages(flag, get_order(size)); 139 + if (p) 140 + addr = page_address(p); 141 + 142 + if (dma_handle) 143 + *dma_handle = (uintptr_t)addr; 144 + 145 + return addr; 146 + } 147 + 148 + static void rxe_dma_free_coherent(struct ib_device *dev, size_t size, 149 + void *cpu_addr, u64 dma_handle) 150 + { 151 + free_pages((unsigned long)cpu_addr, get_order(size)); 152 + } 153 + 154 + struct ib_dma_mapping_ops rxe_dma_mapping_ops = { 155 + .mapping_error = rxe_mapping_error, 156 + .map_single = rxe_dma_map_single, 157 + .unmap_single = rxe_dma_unmap_single, 158 + .map_page = rxe_dma_map_page, 159 + .unmap_page = rxe_dma_unmap_page, 160 + .map_sg = rxe_map_sg, 161 + .unmap_sg = rxe_unmap_sg, 162 + .sync_single_for_cpu = rxe_sync_single_for_cpu, 163 + .sync_single_for_device = rxe_sync_single_for_device, 164 + .alloc_coherent = rxe_dma_alloc_coherent, 165 + .free_coherent = rxe_dma_free_coherent 166 + };
+952
drivers/infiniband/sw/rxe/rxe_hdr.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_HDR_H 35 + #define RXE_HDR_H 36 + 37 + /* extracted information about a packet carried in an sk_buff struct fits in 38 + * the skbuff cb array. Must be at most 48 bytes. stored in control block of 39 + * sk_buff for received packets. 40 + */ 41 + struct rxe_pkt_info { 42 + struct rxe_dev *rxe; /* device that owns packet */ 43 + struct rxe_qp *qp; /* qp that owns packet */ 44 + struct rxe_send_wqe *wqe; /* send wqe */ 45 + u8 *hdr; /* points to bth */ 46 + u32 mask; /* useful info about pkt */ 47 + u32 psn; /* bth psn of packet */ 48 + u16 pkey_index; /* partition of pkt */ 49 + u16 paylen; /* length of bth - icrc */ 50 + u8 port_num; /* port pkt received on */ 51 + u8 opcode; /* bth opcode of packet */ 52 + u8 offset; /* bth offset from pkt->hdr */ 53 + }; 54 + 55 + /* Macros should be used only for received skb */ 56 + #define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb) 57 + #define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb) 58 + 59 + /* 60 + * IBA header types and methods 61 + * 62 + * Some of these are for reference and completeness only since 63 + * rxe does not currently support RD transport 64 + * most of this could be moved into IB core. ib_pack.h has 65 + * part of this but is incomplete 66 + * 67 + * Header specific routines to insert/extract values to/from headers 68 + * the routines that are named __hhh_(set_)fff() take a pointer to a 69 + * hhh header and get(set) the fff field. The routines named 70 + * hhh_(set_)fff take a packet info struct and find the 71 + * header and field based on the opcode in the packet. 72 + * Conversion to/from network byte order from cpu order is also done. 73 + */ 74 + 75 + #define RXE_ICRC_SIZE (4) 76 + #define RXE_MAX_HDR_LENGTH (80) 77 + 78 + /****************************************************************************** 79 + * Base Transport Header 80 + ******************************************************************************/ 81 + struct rxe_bth { 82 + u8 opcode; 83 + u8 flags; 84 + __be16 pkey; 85 + __be32 qpn; 86 + __be32 apsn; 87 + }; 88 + 89 + #define BTH_TVER (0) 90 + #define BTH_DEF_PKEY (0xffff) 91 + 92 + #define BTH_SE_MASK (0x80) 93 + #define BTH_MIG_MASK (0x40) 94 + #define BTH_PAD_MASK (0x30) 95 + #define BTH_TVER_MASK (0x0f) 96 + #define BTH_FECN_MASK (0x80000000) 97 + #define BTH_BECN_MASK (0x40000000) 98 + #define BTH_RESV6A_MASK (0x3f000000) 99 + #define BTH_QPN_MASK (0x00ffffff) 100 + #define BTH_ACK_MASK (0x80000000) 101 + #define BTH_RESV7_MASK (0x7f000000) 102 + #define BTH_PSN_MASK (0x00ffffff) 103 + 104 + static inline u8 __bth_opcode(void *arg) 105 + { 106 + struct rxe_bth *bth = arg; 107 + 108 + return bth->opcode; 109 + } 110 + 111 + static inline void __bth_set_opcode(void *arg, u8 opcode) 112 + { 113 + struct rxe_bth *bth = arg; 114 + 115 + bth->opcode = opcode; 116 + } 117 + 118 + static inline u8 __bth_se(void *arg) 119 + { 120 + struct rxe_bth *bth = arg; 121 + 122 + return 0 != (BTH_SE_MASK & bth->flags); 123 + } 124 + 125 + static inline void __bth_set_se(void *arg, int se) 126 + { 127 + struct rxe_bth *bth = arg; 128 + 129 + if (se) 130 + bth->flags |= BTH_SE_MASK; 131 + else 132 + bth->flags &= ~BTH_SE_MASK; 133 + } 134 + 135 + static inline u8 __bth_mig(void *arg) 136 + { 137 + struct rxe_bth *bth = arg; 138 + 139 + return 0 != (BTH_MIG_MASK & bth->flags); 140 + } 141 + 142 + static inline void __bth_set_mig(void *arg, u8 mig) 143 + { 144 + struct rxe_bth *bth = arg; 145 + 146 + if (mig) 147 + bth->flags |= BTH_MIG_MASK; 148 + else 149 + bth->flags &= ~BTH_MIG_MASK; 150 + } 151 + 152 + static inline u8 __bth_pad(void *arg) 153 + { 154 + struct rxe_bth *bth = arg; 155 + 156 + return (BTH_PAD_MASK & bth->flags) >> 4; 157 + } 158 + 159 + static inline void __bth_set_pad(void *arg, u8 pad) 160 + { 161 + struct rxe_bth *bth = arg; 162 + 163 + bth->flags = (BTH_PAD_MASK & (pad << 4)) | 164 + (~BTH_PAD_MASK & bth->flags); 165 + } 166 + 167 + static inline u8 __bth_tver(void *arg) 168 + { 169 + struct rxe_bth *bth = arg; 170 + 171 + return BTH_TVER_MASK & bth->flags; 172 + } 173 + 174 + static inline void __bth_set_tver(void *arg, u8 tver) 175 + { 176 + struct rxe_bth *bth = arg; 177 + 178 + bth->flags = (BTH_TVER_MASK & tver) | 179 + (~BTH_TVER_MASK & bth->flags); 180 + } 181 + 182 + static inline u16 __bth_pkey(void *arg) 183 + { 184 + struct rxe_bth *bth = arg; 185 + 186 + return be16_to_cpu(bth->pkey); 187 + } 188 + 189 + static inline void __bth_set_pkey(void *arg, u16 pkey) 190 + { 191 + struct rxe_bth *bth = arg; 192 + 193 + bth->pkey = cpu_to_be16(pkey); 194 + } 195 + 196 + static inline u32 __bth_qpn(void *arg) 197 + { 198 + struct rxe_bth *bth = arg; 199 + 200 + return BTH_QPN_MASK & be32_to_cpu(bth->qpn); 201 + } 202 + 203 + static inline void __bth_set_qpn(void *arg, u32 qpn) 204 + { 205 + struct rxe_bth *bth = arg; 206 + u32 resvqpn = be32_to_cpu(bth->qpn); 207 + 208 + bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) | 209 + (~BTH_QPN_MASK & resvqpn)); 210 + } 211 + 212 + static inline int __bth_fecn(void *arg) 213 + { 214 + struct rxe_bth *bth = arg; 215 + 216 + return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn); 217 + } 218 + 219 + static inline void __bth_set_fecn(void *arg, int fecn) 220 + { 221 + struct rxe_bth *bth = arg; 222 + 223 + if (fecn) 224 + bth->qpn |= cpu_to_be32(BTH_FECN_MASK); 225 + else 226 + bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK); 227 + } 228 + 229 + static inline int __bth_becn(void *arg) 230 + { 231 + struct rxe_bth *bth = arg; 232 + 233 + return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn); 234 + } 235 + 236 + static inline void __bth_set_becn(void *arg, int becn) 237 + { 238 + struct rxe_bth *bth = arg; 239 + 240 + if (becn) 241 + bth->qpn |= cpu_to_be32(BTH_BECN_MASK); 242 + else 243 + bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK); 244 + } 245 + 246 + static inline u8 __bth_resv6a(void *arg) 247 + { 248 + struct rxe_bth *bth = arg; 249 + 250 + return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24; 251 + } 252 + 253 + static inline void __bth_set_resv6a(void *arg) 254 + { 255 + struct rxe_bth *bth = arg; 256 + 257 + bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); 258 + } 259 + 260 + static inline int __bth_ack(void *arg) 261 + { 262 + struct rxe_bth *bth = arg; 263 + 264 + return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn); 265 + } 266 + 267 + static inline void __bth_set_ack(void *arg, int ack) 268 + { 269 + struct rxe_bth *bth = arg; 270 + 271 + if (ack) 272 + bth->apsn |= cpu_to_be32(BTH_ACK_MASK); 273 + else 274 + bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK); 275 + } 276 + 277 + static inline void __bth_set_resv7(void *arg) 278 + { 279 + struct rxe_bth *bth = arg; 280 + 281 + bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK); 282 + } 283 + 284 + static inline u32 __bth_psn(void *arg) 285 + { 286 + struct rxe_bth *bth = arg; 287 + 288 + return BTH_PSN_MASK & be32_to_cpu(bth->apsn); 289 + } 290 + 291 + static inline void __bth_set_psn(void *arg, u32 psn) 292 + { 293 + struct rxe_bth *bth = arg; 294 + u32 apsn = be32_to_cpu(bth->apsn); 295 + 296 + bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) | 297 + (~BTH_PSN_MASK & apsn)); 298 + } 299 + 300 + static inline u8 bth_opcode(struct rxe_pkt_info *pkt) 301 + { 302 + return __bth_opcode(pkt->hdr + pkt->offset); 303 + } 304 + 305 + static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) 306 + { 307 + __bth_set_opcode(pkt->hdr + pkt->offset, opcode); 308 + } 309 + 310 + static inline u8 bth_se(struct rxe_pkt_info *pkt) 311 + { 312 + return __bth_se(pkt->hdr + pkt->offset); 313 + } 314 + 315 + static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) 316 + { 317 + __bth_set_se(pkt->hdr + pkt->offset, se); 318 + } 319 + 320 + static inline u8 bth_mig(struct rxe_pkt_info *pkt) 321 + { 322 + return __bth_mig(pkt->hdr + pkt->offset); 323 + } 324 + 325 + static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) 326 + { 327 + __bth_set_mig(pkt->hdr + pkt->offset, mig); 328 + } 329 + 330 + static inline u8 bth_pad(struct rxe_pkt_info *pkt) 331 + { 332 + return __bth_pad(pkt->hdr + pkt->offset); 333 + } 334 + 335 + static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) 336 + { 337 + __bth_set_pad(pkt->hdr + pkt->offset, pad); 338 + } 339 + 340 + static inline u8 bth_tver(struct rxe_pkt_info *pkt) 341 + { 342 + return __bth_tver(pkt->hdr + pkt->offset); 343 + } 344 + 345 + static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) 346 + { 347 + __bth_set_tver(pkt->hdr + pkt->offset, tver); 348 + } 349 + 350 + static inline u16 bth_pkey(struct rxe_pkt_info *pkt) 351 + { 352 + return __bth_pkey(pkt->hdr + pkt->offset); 353 + } 354 + 355 + static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) 356 + { 357 + __bth_set_pkey(pkt->hdr + pkt->offset, pkey); 358 + } 359 + 360 + static inline u32 bth_qpn(struct rxe_pkt_info *pkt) 361 + { 362 + return __bth_qpn(pkt->hdr + pkt->offset); 363 + } 364 + 365 + static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) 366 + { 367 + __bth_set_qpn(pkt->hdr + pkt->offset, qpn); 368 + } 369 + 370 + static inline int bth_fecn(struct rxe_pkt_info *pkt) 371 + { 372 + return __bth_fecn(pkt->hdr + pkt->offset); 373 + } 374 + 375 + static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) 376 + { 377 + __bth_set_fecn(pkt->hdr + pkt->offset, fecn); 378 + } 379 + 380 + static inline int bth_becn(struct rxe_pkt_info *pkt) 381 + { 382 + return __bth_becn(pkt->hdr + pkt->offset); 383 + } 384 + 385 + static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) 386 + { 387 + __bth_set_becn(pkt->hdr + pkt->offset, becn); 388 + } 389 + 390 + static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) 391 + { 392 + return __bth_resv6a(pkt->hdr + pkt->offset); 393 + } 394 + 395 + static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) 396 + { 397 + __bth_set_resv6a(pkt->hdr + pkt->offset); 398 + } 399 + 400 + static inline int bth_ack(struct rxe_pkt_info *pkt) 401 + { 402 + return __bth_ack(pkt->hdr + pkt->offset); 403 + } 404 + 405 + static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) 406 + { 407 + __bth_set_ack(pkt->hdr + pkt->offset, ack); 408 + } 409 + 410 + static inline void bth_set_resv7(struct rxe_pkt_info *pkt) 411 + { 412 + __bth_set_resv7(pkt->hdr + pkt->offset); 413 + } 414 + 415 + static inline u32 bth_psn(struct rxe_pkt_info *pkt) 416 + { 417 + return __bth_psn(pkt->hdr + pkt->offset); 418 + } 419 + 420 + static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) 421 + { 422 + __bth_set_psn(pkt->hdr + pkt->offset, psn); 423 + } 424 + 425 + static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, 426 + int mig, int pad, u16 pkey, u32 qpn, int ack_req, 427 + u32 psn) 428 + { 429 + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); 430 + 431 + bth->opcode = opcode; 432 + bth->flags = (pad << 4) & BTH_PAD_MASK; 433 + if (se) 434 + bth->flags |= BTH_SE_MASK; 435 + if (mig) 436 + bth->flags |= BTH_MIG_MASK; 437 + bth->pkey = cpu_to_be16(pkey); 438 + bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK); 439 + psn &= BTH_PSN_MASK; 440 + if (ack_req) 441 + psn |= BTH_ACK_MASK; 442 + bth->apsn = cpu_to_be32(psn); 443 + } 444 + 445 + /****************************************************************************** 446 + * Reliable Datagram Extended Transport Header 447 + ******************************************************************************/ 448 + struct rxe_rdeth { 449 + __be32 een; 450 + }; 451 + 452 + #define RDETH_EEN_MASK (0x00ffffff) 453 + 454 + static inline u8 __rdeth_een(void *arg) 455 + { 456 + struct rxe_rdeth *rdeth = arg; 457 + 458 + return RDETH_EEN_MASK & be32_to_cpu(rdeth->een); 459 + } 460 + 461 + static inline void __rdeth_set_een(void *arg, u32 een) 462 + { 463 + struct rxe_rdeth *rdeth = arg; 464 + 465 + rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een); 466 + } 467 + 468 + static inline u8 rdeth_een(struct rxe_pkt_info *pkt) 469 + { 470 + return __rdeth_een(pkt->hdr + pkt->offset 471 + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); 472 + } 473 + 474 + static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) 475 + { 476 + __rdeth_set_een(pkt->hdr + pkt->offset 477 + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); 478 + } 479 + 480 + /****************************************************************************** 481 + * Datagram Extended Transport Header 482 + ******************************************************************************/ 483 + struct rxe_deth { 484 + __be32 qkey; 485 + __be32 sqp; 486 + }; 487 + 488 + #define GSI_QKEY (0x80010000) 489 + #define DETH_SQP_MASK (0x00ffffff) 490 + 491 + static inline u32 __deth_qkey(void *arg) 492 + { 493 + struct rxe_deth *deth = arg; 494 + 495 + return be32_to_cpu(deth->qkey); 496 + } 497 + 498 + static inline void __deth_set_qkey(void *arg, u32 qkey) 499 + { 500 + struct rxe_deth *deth = arg; 501 + 502 + deth->qkey = cpu_to_be32(qkey); 503 + } 504 + 505 + static inline u32 __deth_sqp(void *arg) 506 + { 507 + struct rxe_deth *deth = arg; 508 + 509 + return DETH_SQP_MASK & be32_to_cpu(deth->sqp); 510 + } 511 + 512 + static inline void __deth_set_sqp(void *arg, u32 sqp) 513 + { 514 + struct rxe_deth *deth = arg; 515 + 516 + deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp); 517 + } 518 + 519 + static inline u32 deth_qkey(struct rxe_pkt_info *pkt) 520 + { 521 + return __deth_qkey(pkt->hdr + pkt->offset 522 + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); 523 + } 524 + 525 + static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) 526 + { 527 + __deth_set_qkey(pkt->hdr + pkt->offset 528 + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); 529 + } 530 + 531 + static inline u32 deth_sqp(struct rxe_pkt_info *pkt) 532 + { 533 + return __deth_sqp(pkt->hdr + pkt->offset 534 + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); 535 + } 536 + 537 + static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) 538 + { 539 + __deth_set_sqp(pkt->hdr + pkt->offset 540 + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); 541 + } 542 + 543 + /****************************************************************************** 544 + * RDMA Extended Transport Header 545 + ******************************************************************************/ 546 + struct rxe_reth { 547 + __be64 va; 548 + __be32 rkey; 549 + __be32 len; 550 + }; 551 + 552 + static inline u64 __reth_va(void *arg) 553 + { 554 + struct rxe_reth *reth = arg; 555 + 556 + return be64_to_cpu(reth->va); 557 + } 558 + 559 + static inline void __reth_set_va(void *arg, u64 va) 560 + { 561 + struct rxe_reth *reth = arg; 562 + 563 + reth->va = cpu_to_be64(va); 564 + } 565 + 566 + static inline u32 __reth_rkey(void *arg) 567 + { 568 + struct rxe_reth *reth = arg; 569 + 570 + return be32_to_cpu(reth->rkey); 571 + } 572 + 573 + static inline void __reth_set_rkey(void *arg, u32 rkey) 574 + { 575 + struct rxe_reth *reth = arg; 576 + 577 + reth->rkey = cpu_to_be32(rkey); 578 + } 579 + 580 + static inline u32 __reth_len(void *arg) 581 + { 582 + struct rxe_reth *reth = arg; 583 + 584 + return be32_to_cpu(reth->len); 585 + } 586 + 587 + static inline void __reth_set_len(void *arg, u32 len) 588 + { 589 + struct rxe_reth *reth = arg; 590 + 591 + reth->len = cpu_to_be32(len); 592 + } 593 + 594 + static inline u64 reth_va(struct rxe_pkt_info *pkt) 595 + { 596 + return __reth_va(pkt->hdr + pkt->offset 597 + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); 598 + } 599 + 600 + static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) 601 + { 602 + __reth_set_va(pkt->hdr + pkt->offset 603 + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); 604 + } 605 + 606 + static inline u32 reth_rkey(struct rxe_pkt_info *pkt) 607 + { 608 + return __reth_rkey(pkt->hdr + pkt->offset 609 + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); 610 + } 611 + 612 + static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) 613 + { 614 + __reth_set_rkey(pkt->hdr + pkt->offset 615 + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); 616 + } 617 + 618 + static inline u32 reth_len(struct rxe_pkt_info *pkt) 619 + { 620 + return __reth_len(pkt->hdr + pkt->offset 621 + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); 622 + } 623 + 624 + static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) 625 + { 626 + __reth_set_len(pkt->hdr + pkt->offset 627 + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); 628 + } 629 + 630 + /****************************************************************************** 631 + * Atomic Extended Transport Header 632 + ******************************************************************************/ 633 + struct rxe_atmeth { 634 + __be64 va; 635 + __be32 rkey; 636 + __be64 swap_add; 637 + __be64 comp; 638 + } __attribute__((__packed__)); 639 + 640 + static inline u64 __atmeth_va(void *arg) 641 + { 642 + struct rxe_atmeth *atmeth = arg; 643 + 644 + return be64_to_cpu(atmeth->va); 645 + } 646 + 647 + static inline void __atmeth_set_va(void *arg, u64 va) 648 + { 649 + struct rxe_atmeth *atmeth = arg; 650 + 651 + atmeth->va = cpu_to_be64(va); 652 + } 653 + 654 + static inline u32 __atmeth_rkey(void *arg) 655 + { 656 + struct rxe_atmeth *atmeth = arg; 657 + 658 + return be32_to_cpu(atmeth->rkey); 659 + } 660 + 661 + static inline void __atmeth_set_rkey(void *arg, u32 rkey) 662 + { 663 + struct rxe_atmeth *atmeth = arg; 664 + 665 + atmeth->rkey = cpu_to_be32(rkey); 666 + } 667 + 668 + static inline u64 __atmeth_swap_add(void *arg) 669 + { 670 + struct rxe_atmeth *atmeth = arg; 671 + 672 + return be64_to_cpu(atmeth->swap_add); 673 + } 674 + 675 + static inline void __atmeth_set_swap_add(void *arg, u64 swap_add) 676 + { 677 + struct rxe_atmeth *atmeth = arg; 678 + 679 + atmeth->swap_add = cpu_to_be64(swap_add); 680 + } 681 + 682 + static inline u64 __atmeth_comp(void *arg) 683 + { 684 + struct rxe_atmeth *atmeth = arg; 685 + 686 + return be64_to_cpu(atmeth->comp); 687 + } 688 + 689 + static inline void __atmeth_set_comp(void *arg, u64 comp) 690 + { 691 + struct rxe_atmeth *atmeth = arg; 692 + 693 + atmeth->comp = cpu_to_be64(comp); 694 + } 695 + 696 + static inline u64 atmeth_va(struct rxe_pkt_info *pkt) 697 + { 698 + return __atmeth_va(pkt->hdr + pkt->offset 699 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); 700 + } 701 + 702 + static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) 703 + { 704 + __atmeth_set_va(pkt->hdr + pkt->offset 705 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); 706 + } 707 + 708 + static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) 709 + { 710 + return __atmeth_rkey(pkt->hdr + pkt->offset 711 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); 712 + } 713 + 714 + static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) 715 + { 716 + __atmeth_set_rkey(pkt->hdr + pkt->offset 717 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); 718 + } 719 + 720 + static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) 721 + { 722 + return __atmeth_swap_add(pkt->hdr + pkt->offset 723 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); 724 + } 725 + 726 + static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) 727 + { 728 + __atmeth_set_swap_add(pkt->hdr + pkt->offset 729 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); 730 + } 731 + 732 + static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) 733 + { 734 + return __atmeth_comp(pkt->hdr + pkt->offset 735 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); 736 + } 737 + 738 + static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) 739 + { 740 + __atmeth_set_comp(pkt->hdr + pkt->offset 741 + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); 742 + } 743 + 744 + /****************************************************************************** 745 + * Ack Extended Transport Header 746 + ******************************************************************************/ 747 + struct rxe_aeth { 748 + __be32 smsn; 749 + }; 750 + 751 + #define AETH_SYN_MASK (0xff000000) 752 + #define AETH_MSN_MASK (0x00ffffff) 753 + 754 + enum aeth_syndrome { 755 + AETH_TYPE_MASK = 0xe0, 756 + AETH_ACK = 0x00, 757 + AETH_RNR_NAK = 0x20, 758 + AETH_RSVD = 0x40, 759 + AETH_NAK = 0x60, 760 + AETH_ACK_UNLIMITED = 0x1f, 761 + AETH_NAK_PSN_SEQ_ERROR = 0x60, 762 + AETH_NAK_INVALID_REQ = 0x61, 763 + AETH_NAK_REM_ACC_ERR = 0x62, 764 + AETH_NAK_REM_OP_ERR = 0x63, 765 + AETH_NAK_INV_RD_REQ = 0x64, 766 + }; 767 + 768 + static inline u8 __aeth_syn(void *arg) 769 + { 770 + struct rxe_aeth *aeth = arg; 771 + 772 + return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24; 773 + } 774 + 775 + static inline void __aeth_set_syn(void *arg, u8 syn) 776 + { 777 + struct rxe_aeth *aeth = arg; 778 + u32 smsn = be32_to_cpu(aeth->smsn); 779 + 780 + aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) | 781 + (~AETH_SYN_MASK & smsn)); 782 + } 783 + 784 + static inline u32 __aeth_msn(void *arg) 785 + { 786 + struct rxe_aeth *aeth = arg; 787 + 788 + return AETH_MSN_MASK & be32_to_cpu(aeth->smsn); 789 + } 790 + 791 + static inline void __aeth_set_msn(void *arg, u32 msn) 792 + { 793 + struct rxe_aeth *aeth = arg; 794 + u32 smsn = be32_to_cpu(aeth->smsn); 795 + 796 + aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) | 797 + (~AETH_MSN_MASK & smsn)); 798 + } 799 + 800 + static inline u8 aeth_syn(struct rxe_pkt_info *pkt) 801 + { 802 + return __aeth_syn(pkt->hdr + pkt->offset 803 + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); 804 + } 805 + 806 + static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) 807 + { 808 + __aeth_set_syn(pkt->hdr + pkt->offset 809 + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); 810 + } 811 + 812 + static inline u32 aeth_msn(struct rxe_pkt_info *pkt) 813 + { 814 + return __aeth_msn(pkt->hdr + pkt->offset 815 + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); 816 + } 817 + 818 + static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) 819 + { 820 + __aeth_set_msn(pkt->hdr + pkt->offset 821 + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); 822 + } 823 + 824 + /****************************************************************************** 825 + * Atomic Ack Extended Transport Header 826 + ******************************************************************************/ 827 + struct rxe_atmack { 828 + __be64 orig; 829 + }; 830 + 831 + static inline u64 __atmack_orig(void *arg) 832 + { 833 + struct rxe_atmack *atmack = arg; 834 + 835 + return be64_to_cpu(atmack->orig); 836 + } 837 + 838 + static inline void __atmack_set_orig(void *arg, u64 orig) 839 + { 840 + struct rxe_atmack *atmack = arg; 841 + 842 + atmack->orig = cpu_to_be64(orig); 843 + } 844 + 845 + static inline u64 atmack_orig(struct rxe_pkt_info *pkt) 846 + { 847 + return __atmack_orig(pkt->hdr + pkt->offset 848 + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); 849 + } 850 + 851 + static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) 852 + { 853 + __atmack_set_orig(pkt->hdr + pkt->offset 854 + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); 855 + } 856 + 857 + /****************************************************************************** 858 + * Immediate Extended Transport Header 859 + ******************************************************************************/ 860 + struct rxe_immdt { 861 + __be32 imm; 862 + }; 863 + 864 + static inline __be32 __immdt_imm(void *arg) 865 + { 866 + struct rxe_immdt *immdt = arg; 867 + 868 + return immdt->imm; 869 + } 870 + 871 + static inline void __immdt_set_imm(void *arg, __be32 imm) 872 + { 873 + struct rxe_immdt *immdt = arg; 874 + 875 + immdt->imm = imm; 876 + } 877 + 878 + static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) 879 + { 880 + return __immdt_imm(pkt->hdr + pkt->offset 881 + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); 882 + } 883 + 884 + static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) 885 + { 886 + __immdt_set_imm(pkt->hdr + pkt->offset 887 + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); 888 + } 889 + 890 + /****************************************************************************** 891 + * Invalidate Extended Transport Header 892 + ******************************************************************************/ 893 + struct rxe_ieth { 894 + __be32 rkey; 895 + }; 896 + 897 + static inline u32 __ieth_rkey(void *arg) 898 + { 899 + struct rxe_ieth *ieth = arg; 900 + 901 + return be32_to_cpu(ieth->rkey); 902 + } 903 + 904 + static inline void __ieth_set_rkey(void *arg, u32 rkey) 905 + { 906 + struct rxe_ieth *ieth = arg; 907 + 908 + ieth->rkey = cpu_to_be32(rkey); 909 + } 910 + 911 + static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) 912 + { 913 + return __ieth_rkey(pkt->hdr + pkt->offset 914 + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); 915 + } 916 + 917 + static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) 918 + { 919 + __ieth_set_rkey(pkt->hdr + pkt->offset 920 + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); 921 + } 922 + 923 + enum rxe_hdr_length { 924 + RXE_BTH_BYTES = sizeof(struct rxe_bth), 925 + RXE_DETH_BYTES = sizeof(struct rxe_deth), 926 + RXE_IMMDT_BYTES = sizeof(struct rxe_immdt), 927 + RXE_RETH_BYTES = sizeof(struct rxe_reth), 928 + RXE_AETH_BYTES = sizeof(struct rxe_aeth), 929 + RXE_ATMACK_BYTES = sizeof(struct rxe_atmack), 930 + RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), 931 + RXE_IETH_BYTES = sizeof(struct rxe_ieth), 932 + RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), 933 + }; 934 + 935 + static inline size_t header_size(struct rxe_pkt_info *pkt) 936 + { 937 + return pkt->offset + rxe_opcode[pkt->opcode].length; 938 + } 939 + 940 + static inline void *payload_addr(struct rxe_pkt_info *pkt) 941 + { 942 + return pkt->hdr + pkt->offset 943 + + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; 944 + } 945 + 946 + static inline size_t payload_size(struct rxe_pkt_info *pkt) 947 + { 948 + return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD] 949 + - bth_pad(pkt) - RXE_ICRC_SIZE; 950 + } 951 + 952 + #endif /* RXE_HDR_H */
+96
drivers/infiniband/sw/rxe/rxe_icrc.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + /* Compute a partial ICRC for all the IB transport headers. */ 38 + u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) 39 + { 40 + unsigned int bth_offset = 0; 41 + struct iphdr *ip4h = NULL; 42 + struct ipv6hdr *ip6h = NULL; 43 + struct udphdr *udph; 44 + struct rxe_bth *bth; 45 + int crc; 46 + int length; 47 + int hdr_size = sizeof(struct udphdr) + 48 + (skb->protocol == htons(ETH_P_IP) ? 49 + sizeof(struct iphdr) : sizeof(struct ipv6hdr)); 50 + /* pseudo header buffer size is calculate using ipv6 header size since 51 + * it is bigger than ipv4 52 + */ 53 + u8 pshdr[sizeof(struct udphdr) + 54 + sizeof(struct ipv6hdr) + 55 + RXE_BTH_BYTES]; 56 + 57 + /* This seed is the result of computing a CRC with a seed of 58 + * 0xfffffff and 8 bytes of 0xff representing a masked LRH. 59 + */ 60 + crc = 0xdebb20e3; 61 + 62 + if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ 63 + memcpy(pshdr, ip_hdr(skb), hdr_size); 64 + ip4h = (struct iphdr *)pshdr; 65 + udph = (struct udphdr *)(ip4h + 1); 66 + 67 + ip4h->ttl = 0xff; 68 + ip4h->check = CSUM_MANGLED_0; 69 + ip4h->tos = 0xff; 70 + } else { /* IPv6 */ 71 + memcpy(pshdr, ipv6_hdr(skb), hdr_size); 72 + ip6h = (struct ipv6hdr *)pshdr; 73 + udph = (struct udphdr *)(ip6h + 1); 74 + 75 + memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl)); 76 + ip6h->priority = 0xf; 77 + ip6h->hop_limit = 0xff; 78 + } 79 + udph->check = CSUM_MANGLED_0; 80 + 81 + bth_offset += hdr_size; 82 + 83 + memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES); 84 + bth = (struct rxe_bth *)&pshdr[bth_offset]; 85 + 86 + /* exclude bth.resv8a */ 87 + bth->qpn |= cpu_to_be32(~BTH_QPN_MASK); 88 + 89 + length = hdr_size + RXE_BTH_BYTES; 90 + crc = crc32_le(crc, pshdr, length); 91 + 92 + /* And finish to compute the CRC on the remainder of the headers. */ 93 + crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES, 94 + rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES); 95 + return crc; 96 + }
+286
drivers/infiniband/sw/rxe/rxe_loc.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_LOC_H 35 + #define RXE_LOC_H 36 + 37 + /* rxe_av.c */ 38 + 39 + int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr); 40 + 41 + int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, 42 + struct rxe_av *av, struct ib_ah_attr *attr); 43 + 44 + int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, 45 + struct ib_ah_attr *attr); 46 + 47 + int rxe_av_fill_ip_info(struct rxe_dev *rxe, 48 + struct rxe_av *av, 49 + struct ib_ah_attr *attr, 50 + struct ib_gid_attr *sgid_attr, 51 + union ib_gid *sgid); 52 + 53 + struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); 54 + 55 + /* rxe_cq.c */ 56 + int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, 57 + int cqe, int comp_vector, struct ib_udata *udata); 58 + 59 + int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, 60 + int comp_vector, struct ib_ucontext *context, 61 + struct ib_udata *udata); 62 + 63 + int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); 64 + 65 + int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); 66 + 67 + void rxe_cq_cleanup(void *arg); 68 + 69 + /* rxe_mcast.c */ 70 + int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, 71 + struct rxe_mc_grp **grp_p); 72 + 73 + int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, 74 + struct rxe_mc_grp *grp); 75 + 76 + int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, 77 + union ib_gid *mgid); 78 + 79 + void rxe_drop_all_mcast_groups(struct rxe_qp *qp); 80 + 81 + void rxe_mc_cleanup(void *arg); 82 + 83 + /* rxe_mmap.c */ 84 + struct rxe_mmap_info { 85 + struct list_head pending_mmaps; 86 + struct ib_ucontext *context; 87 + struct kref ref; 88 + void *obj; 89 + 90 + struct mminfo info; 91 + }; 92 + 93 + void rxe_mmap_release(struct kref *ref); 94 + 95 + struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, 96 + u32 size, 97 + struct ib_ucontext *context, 98 + void *obj); 99 + 100 + int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); 101 + 102 + /* rxe_mr.c */ 103 + enum copy_direction { 104 + to_mem_obj, 105 + from_mem_obj, 106 + }; 107 + 108 + int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 109 + int access, struct rxe_mem *mem); 110 + 111 + int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 112 + u64 length, u64 iova, int access, struct ib_udata *udata, 113 + struct rxe_mem *mr); 114 + 115 + int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 116 + int max_pages, struct rxe_mem *mem); 117 + 118 + int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, 119 + int length, enum copy_direction dir, u32 *crcp); 120 + 121 + int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access, 122 + struct rxe_dma_info *dma, void *addr, int length, 123 + enum copy_direction dir, u32 *crcp); 124 + 125 + void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length); 126 + 127 + enum lookup_type { 128 + lookup_local, 129 + lookup_remote, 130 + }; 131 + 132 + struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 133 + enum lookup_type type); 134 + 135 + int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); 136 + 137 + int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 138 + u64 *page, int num_pages, u64 iova); 139 + 140 + void rxe_mem_cleanup(void *arg); 141 + 142 + int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); 143 + 144 + /* rxe_qp.c */ 145 + int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); 146 + 147 + int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, 148 + struct ib_qp_init_attr *init, struct ib_udata *udata, 149 + struct ib_pd *ibpd); 150 + 151 + int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); 152 + 153 + int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, 154 + struct ib_qp_attr *attr, int mask); 155 + 156 + int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, 157 + int mask, struct ib_udata *udata); 158 + 159 + int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); 160 + 161 + void rxe_qp_error(struct rxe_qp *qp); 162 + 163 + void rxe_qp_destroy(struct rxe_qp *qp); 164 + 165 + void rxe_qp_cleanup(void *arg); 166 + 167 + static inline int qp_num(struct rxe_qp *qp) 168 + { 169 + return qp->ibqp.qp_num; 170 + } 171 + 172 + static inline enum ib_qp_type qp_type(struct rxe_qp *qp) 173 + { 174 + return qp->ibqp.qp_type; 175 + } 176 + 177 + static inline enum ib_qp_state qp_state(struct rxe_qp *qp) 178 + { 179 + return qp->attr.qp_state; 180 + } 181 + 182 + static inline int qp_mtu(struct rxe_qp *qp) 183 + { 184 + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) 185 + return qp->attr.path_mtu; 186 + else 187 + return RXE_PORT_MAX_MTU; 188 + } 189 + 190 + static inline int rcv_wqe_size(int max_sge) 191 + { 192 + return sizeof(struct rxe_recv_wqe) + 193 + max_sge * sizeof(struct ib_sge); 194 + } 195 + 196 + void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); 197 + 198 + static inline void rxe_advance_resp_resource(struct rxe_qp *qp) 199 + { 200 + qp->resp.res_head++; 201 + if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) 202 + qp->resp.res_head = 0; 203 + } 204 + 205 + void retransmit_timer(unsigned long data); 206 + void rnr_nak_timer(unsigned long data); 207 + 208 + void dump_qp(struct rxe_qp *qp); 209 + 210 + /* rxe_srq.c */ 211 + #define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) 212 + 213 + int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 214 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); 215 + 216 + int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, 217 + struct ib_srq_init_attr *init, 218 + struct ib_ucontext *context, struct ib_udata *udata); 219 + 220 + int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 221 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, 222 + struct ib_udata *udata); 223 + 224 + extern struct ib_dma_mapping_ops rxe_dma_mapping_ops; 225 + 226 + void rxe_release(struct kref *kref); 227 + 228 + int rxe_completer(void *arg); 229 + int rxe_requester(void *arg); 230 + int rxe_responder(void *arg); 231 + 232 + u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); 233 + 234 + void rxe_resp_queue_pkt(struct rxe_dev *rxe, 235 + struct rxe_qp *qp, struct sk_buff *skb); 236 + 237 + void rxe_comp_queue_pkt(struct rxe_dev *rxe, 238 + struct rxe_qp *qp, struct sk_buff *skb); 239 + 240 + static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp) 241 + { 242 + return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; 243 + } 244 + 245 + static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, 246 + struct rxe_pkt_info *pkt, struct sk_buff *skb) 247 + { 248 + int err; 249 + int is_request = pkt->mask & RXE_REQ_MASK; 250 + 251 + if ((is_request && (qp->req.state != QP_STATE_READY)) || 252 + (!is_request && (qp->resp.state != QP_STATE_READY))) { 253 + pr_info("Packet dropped. QP is not in ready state\n"); 254 + goto drop; 255 + } 256 + 257 + if (pkt->mask & RXE_LOOPBACK_MASK) { 258 + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); 259 + err = rxe->ifc_ops->loopback(skb); 260 + } else { 261 + err = rxe->ifc_ops->send(rxe, pkt, skb); 262 + } 263 + 264 + if (err) { 265 + rxe->xmit_errors++; 266 + return err; 267 + } 268 + 269 + atomic_inc(&qp->skb_out); 270 + 271 + if ((qp_type(qp) != IB_QPT_RC) && 272 + (pkt->mask & RXE_END_MASK)) { 273 + pkt->wqe->state = wqe_state_done; 274 + rxe_run_task(&qp->comp.task, 1); 275 + } 276 + 277 + goto done; 278 + 279 + drop: 280 + kfree_skb(skb); 281 + err = 0; 282 + done: 283 + return err; 284 + } 285 + 286 + #endif /* RXE_LOC_H */
+190
drivers/infiniband/sw/rxe/rxe_mcast.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, 38 + struct rxe_mc_grp **grp_p) 39 + { 40 + int err; 41 + struct rxe_mc_grp *grp; 42 + 43 + if (rxe->attr.max_mcast_qp_attach == 0) { 44 + err = -EINVAL; 45 + goto err1; 46 + } 47 + 48 + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); 49 + if (grp) 50 + goto done; 51 + 52 + grp = rxe_alloc(&rxe->mc_grp_pool); 53 + if (!grp) { 54 + err = -ENOMEM; 55 + goto err1; 56 + } 57 + 58 + INIT_LIST_HEAD(&grp->qp_list); 59 + spin_lock_init(&grp->mcg_lock); 60 + grp->rxe = rxe; 61 + 62 + rxe_add_key(grp, mgid); 63 + 64 + err = rxe->ifc_ops->mcast_add(rxe, mgid); 65 + if (err) 66 + goto err2; 67 + 68 + done: 69 + *grp_p = grp; 70 + return 0; 71 + 72 + err2: 73 + rxe_drop_ref(grp); 74 + err1: 75 + return err; 76 + } 77 + 78 + int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, 79 + struct rxe_mc_grp *grp) 80 + { 81 + int err; 82 + struct rxe_mc_elem *elem; 83 + 84 + /* check to see of the qp is already a member of the group */ 85 + spin_lock_bh(&qp->grp_lock); 86 + spin_lock_bh(&grp->mcg_lock); 87 + list_for_each_entry(elem, &grp->qp_list, qp_list) { 88 + if (elem->qp == qp) { 89 + err = 0; 90 + goto out; 91 + } 92 + } 93 + 94 + if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { 95 + err = -ENOMEM; 96 + goto out; 97 + } 98 + 99 + elem = rxe_alloc(&rxe->mc_elem_pool); 100 + if (!elem) { 101 + err = -ENOMEM; 102 + goto out; 103 + } 104 + 105 + /* each qp holds a ref on the grp */ 106 + rxe_add_ref(grp); 107 + 108 + grp->num_qp++; 109 + elem->qp = qp; 110 + elem->grp = grp; 111 + 112 + list_add(&elem->qp_list, &grp->qp_list); 113 + list_add(&elem->grp_list, &qp->grp_list); 114 + 115 + err = 0; 116 + out: 117 + spin_unlock_bh(&grp->mcg_lock); 118 + spin_unlock_bh(&qp->grp_lock); 119 + return err; 120 + } 121 + 122 + int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, 123 + union ib_gid *mgid) 124 + { 125 + struct rxe_mc_grp *grp; 126 + struct rxe_mc_elem *elem, *tmp; 127 + 128 + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); 129 + if (!grp) 130 + goto err1; 131 + 132 + spin_lock_bh(&qp->grp_lock); 133 + spin_lock_bh(&grp->mcg_lock); 134 + 135 + list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { 136 + if (elem->qp == qp) { 137 + list_del(&elem->qp_list); 138 + list_del(&elem->grp_list); 139 + grp->num_qp--; 140 + 141 + spin_unlock_bh(&grp->mcg_lock); 142 + spin_unlock_bh(&qp->grp_lock); 143 + rxe_drop_ref(elem); 144 + rxe_drop_ref(grp); /* ref held by QP */ 145 + rxe_drop_ref(grp); /* ref from get_key */ 146 + return 0; 147 + } 148 + } 149 + 150 + spin_unlock_bh(&grp->mcg_lock); 151 + spin_unlock_bh(&qp->grp_lock); 152 + rxe_drop_ref(grp); /* ref from get_key */ 153 + err1: 154 + return -EINVAL; 155 + } 156 + 157 + void rxe_drop_all_mcast_groups(struct rxe_qp *qp) 158 + { 159 + struct rxe_mc_grp *grp; 160 + struct rxe_mc_elem *elem; 161 + 162 + while (1) { 163 + spin_lock_bh(&qp->grp_lock); 164 + if (list_empty(&qp->grp_list)) { 165 + spin_unlock_bh(&qp->grp_lock); 166 + break; 167 + } 168 + elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, 169 + grp_list); 170 + list_del(&elem->grp_list); 171 + spin_unlock_bh(&qp->grp_lock); 172 + 173 + grp = elem->grp; 174 + spin_lock_bh(&grp->mcg_lock); 175 + list_del(&elem->qp_list); 176 + grp->num_qp--; 177 + spin_unlock_bh(&grp->mcg_lock); 178 + rxe_drop_ref(grp); 179 + rxe_drop_ref(elem); 180 + } 181 + } 182 + 183 + void rxe_mc_cleanup(void *arg) 184 + { 185 + struct rxe_mc_grp *grp = arg; 186 + struct rxe_dev *rxe = grp->rxe; 187 + 188 + rxe_drop_key(grp); 189 + rxe->ifc_ops->mcast_delete(rxe, &grp->mgid); 190 + }
+173
drivers/infiniband/sw/rxe/rxe_mmap.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/module.h> 35 + #include <linux/vmalloc.h> 36 + #include <linux/mm.h> 37 + #include <linux/errno.h> 38 + #include <asm/pgtable.h> 39 + 40 + #include "rxe.h" 41 + #include "rxe_loc.h" 42 + #include "rxe_queue.h" 43 + 44 + void rxe_mmap_release(struct kref *ref) 45 + { 46 + struct rxe_mmap_info *ip = container_of(ref, 47 + struct rxe_mmap_info, ref); 48 + struct rxe_dev *rxe = to_rdev(ip->context->device); 49 + 50 + spin_lock_bh(&rxe->pending_lock); 51 + 52 + if (!list_empty(&ip->pending_mmaps)) 53 + list_del(&ip->pending_mmaps); 54 + 55 + spin_unlock_bh(&rxe->pending_lock); 56 + 57 + vfree(ip->obj); /* buf */ 58 + kfree(ip); 59 + } 60 + 61 + /* 62 + * open and close keep track of how many times the memory region is mapped, 63 + * to avoid releasing it. 64 + */ 65 + static void rxe_vma_open(struct vm_area_struct *vma) 66 + { 67 + struct rxe_mmap_info *ip = vma->vm_private_data; 68 + 69 + kref_get(&ip->ref); 70 + } 71 + 72 + static void rxe_vma_close(struct vm_area_struct *vma) 73 + { 74 + struct rxe_mmap_info *ip = vma->vm_private_data; 75 + 76 + kref_put(&ip->ref, rxe_mmap_release); 77 + } 78 + 79 + static struct vm_operations_struct rxe_vm_ops = { 80 + .open = rxe_vma_open, 81 + .close = rxe_vma_close, 82 + }; 83 + 84 + /** 85 + * rxe_mmap - create a new mmap region 86 + * @context: the IB user context of the process making the mmap() call 87 + * @vma: the VMA to be initialized 88 + * Return zero if the mmap is OK. Otherwise, return an errno. 89 + */ 90 + int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 91 + { 92 + struct rxe_dev *rxe = to_rdev(context->device); 93 + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 94 + unsigned long size = vma->vm_end - vma->vm_start; 95 + struct rxe_mmap_info *ip, *pp; 96 + int ret; 97 + 98 + /* 99 + * Search the device's list of objects waiting for a mmap call. 100 + * Normally, this list is very short since a call to create a 101 + * CQ, QP, or SRQ is soon followed by a call to mmap(). 102 + */ 103 + spin_lock_bh(&rxe->pending_lock); 104 + list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) { 105 + if (context != ip->context || (__u64)offset != ip->info.offset) 106 + continue; 107 + 108 + /* Don't allow a mmap larger than the object. */ 109 + if (size > ip->info.size) { 110 + pr_err("mmap region is larger than the object!\n"); 111 + spin_unlock_bh(&rxe->pending_lock); 112 + ret = -EINVAL; 113 + goto done; 114 + } 115 + 116 + goto found_it; 117 + } 118 + pr_warn("unable to find pending mmap info\n"); 119 + spin_unlock_bh(&rxe->pending_lock); 120 + ret = -EINVAL; 121 + goto done; 122 + 123 + found_it: 124 + list_del_init(&ip->pending_mmaps); 125 + spin_unlock_bh(&rxe->pending_lock); 126 + 127 + ret = remap_vmalloc_range(vma, ip->obj, 0); 128 + if (ret) { 129 + pr_err("rxe: err %d from remap_vmalloc_range\n", ret); 130 + goto done; 131 + } 132 + 133 + vma->vm_ops = &rxe_vm_ops; 134 + vma->vm_private_data = ip; 135 + rxe_vma_open(vma); 136 + done: 137 + return ret; 138 + } 139 + 140 + /* 141 + * Allocate information for rxe_mmap 142 + */ 143 + struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, 144 + u32 size, 145 + struct ib_ucontext *context, 146 + void *obj) 147 + { 148 + struct rxe_mmap_info *ip; 149 + 150 + ip = kmalloc(sizeof(*ip), GFP_KERNEL); 151 + if (!ip) 152 + return NULL; 153 + 154 + size = PAGE_ALIGN(size); 155 + 156 + spin_lock_bh(&rxe->mmap_offset_lock); 157 + 158 + if (rxe->mmap_offset == 0) 159 + rxe->mmap_offset = PAGE_SIZE; 160 + 161 + ip->info.offset = rxe->mmap_offset; 162 + rxe->mmap_offset += size; 163 + 164 + spin_unlock_bh(&rxe->mmap_offset_lock); 165 + 166 + INIT_LIST_HEAD(&ip->pending_mmaps); 167 + ip->info.size = size; 168 + ip->context = context; 169 + ip->obj = obj; 170 + kref_init(&ip->ref); 171 + 172 + return ip; 173 + }
+643
drivers/infiniband/sw/rxe/rxe_mr.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + /* 38 + * lfsr (linear feedback shift register) with period 255 39 + */ 40 + static u8 rxe_get_key(void) 41 + { 42 + static unsigned key = 1; 43 + 44 + key = key << 1; 45 + 46 + key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) 47 + ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); 48 + 49 + key &= 0xff; 50 + 51 + return key; 52 + } 53 + 54 + int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) 55 + { 56 + switch (mem->type) { 57 + case RXE_MEM_TYPE_DMA: 58 + return 0; 59 + 60 + case RXE_MEM_TYPE_MR: 61 + case RXE_MEM_TYPE_FMR: 62 + return ((iova < mem->iova) || 63 + ((iova + length) > (mem->iova + mem->length))) ? 64 + -EFAULT : 0; 65 + 66 + default: 67 + return -EFAULT; 68 + } 69 + } 70 + 71 + #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 72 + | IB_ACCESS_REMOTE_WRITE \ 73 + | IB_ACCESS_REMOTE_ATOMIC) 74 + 75 + static void rxe_mem_init(int access, struct rxe_mem *mem) 76 + { 77 + u32 lkey = mem->pelem.index << 8 | rxe_get_key(); 78 + u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 79 + 80 + if (mem->pelem.pool->type == RXE_TYPE_MR) { 81 + mem->ibmr.lkey = lkey; 82 + mem->ibmr.rkey = rkey; 83 + } 84 + 85 + mem->lkey = lkey; 86 + mem->rkey = rkey; 87 + mem->state = RXE_MEM_STATE_INVALID; 88 + mem->type = RXE_MEM_TYPE_NONE; 89 + mem->map_shift = ilog2(RXE_BUF_PER_MAP); 90 + } 91 + 92 + void rxe_mem_cleanup(void *arg) 93 + { 94 + struct rxe_mem *mem = arg; 95 + int i; 96 + 97 + if (mem->umem) 98 + ib_umem_release(mem->umem); 99 + 100 + if (mem->map) { 101 + for (i = 0; i < mem->num_map; i++) 102 + kfree(mem->map[i]); 103 + 104 + kfree(mem->map); 105 + } 106 + } 107 + 108 + static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) 109 + { 110 + int i; 111 + int num_map; 112 + struct rxe_map **map = mem->map; 113 + 114 + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 115 + 116 + mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 117 + if (!mem->map) 118 + goto err1; 119 + 120 + for (i = 0; i < num_map; i++) { 121 + mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 122 + if (!mem->map[i]) 123 + goto err2; 124 + } 125 + 126 + WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 127 + 128 + mem->map_shift = ilog2(RXE_BUF_PER_MAP); 129 + mem->map_mask = RXE_BUF_PER_MAP - 1; 130 + 131 + mem->num_buf = num_buf; 132 + mem->num_map = num_map; 133 + mem->max_buf = num_map * RXE_BUF_PER_MAP; 134 + 135 + return 0; 136 + 137 + err2: 138 + for (i--; i >= 0; i--) 139 + kfree(mem->map[i]); 140 + 141 + kfree(mem->map); 142 + err1: 143 + return -ENOMEM; 144 + } 145 + 146 + int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, 147 + int access, struct rxe_mem *mem) 148 + { 149 + rxe_mem_init(access, mem); 150 + 151 + mem->pd = pd; 152 + mem->access = access; 153 + mem->state = RXE_MEM_STATE_VALID; 154 + mem->type = RXE_MEM_TYPE_DMA; 155 + 156 + return 0; 157 + } 158 + 159 + int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, 160 + u64 length, u64 iova, int access, struct ib_udata *udata, 161 + struct rxe_mem *mem) 162 + { 163 + int entry; 164 + struct rxe_map **map; 165 + struct rxe_phys_buf *buf = NULL; 166 + struct ib_umem *umem; 167 + struct scatterlist *sg; 168 + int num_buf; 169 + void *vaddr; 170 + int err; 171 + 172 + umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); 173 + if (IS_ERR(umem)) { 174 + pr_warn("err %d from rxe_umem_get\n", 175 + (int)PTR_ERR(umem)); 176 + err = -EINVAL; 177 + goto err1; 178 + } 179 + 180 + mem->umem = umem; 181 + num_buf = umem->nmap; 182 + 183 + rxe_mem_init(access, mem); 184 + 185 + err = rxe_mem_alloc(rxe, mem, num_buf); 186 + if (err) { 187 + pr_warn("err %d from rxe_mem_alloc\n", err); 188 + ib_umem_release(umem); 189 + goto err1; 190 + } 191 + 192 + WARN_ON(!is_power_of_2(umem->page_size)); 193 + 194 + mem->page_shift = ilog2(umem->page_size); 195 + mem->page_mask = umem->page_size - 1; 196 + 197 + num_buf = 0; 198 + map = mem->map; 199 + if (length > 0) { 200 + buf = map[0]->buf; 201 + 202 + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 203 + vaddr = page_address(sg_page(sg)); 204 + if (!vaddr) { 205 + pr_warn("null vaddr\n"); 206 + err = -ENOMEM; 207 + goto err1; 208 + } 209 + 210 + buf->addr = (uintptr_t)vaddr; 211 + buf->size = umem->page_size; 212 + num_buf++; 213 + buf++; 214 + 215 + if (num_buf >= RXE_BUF_PER_MAP) { 216 + map++; 217 + buf = map[0]->buf; 218 + num_buf = 0; 219 + } 220 + } 221 + } 222 + 223 + mem->pd = pd; 224 + mem->umem = umem; 225 + mem->access = access; 226 + mem->length = length; 227 + mem->iova = iova; 228 + mem->va = start; 229 + mem->offset = ib_umem_offset(umem); 230 + mem->state = RXE_MEM_STATE_VALID; 231 + mem->type = RXE_MEM_TYPE_MR; 232 + 233 + return 0; 234 + 235 + err1: 236 + return err; 237 + } 238 + 239 + int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, 240 + int max_pages, struct rxe_mem *mem) 241 + { 242 + int err; 243 + 244 + rxe_mem_init(0, mem); 245 + 246 + /* In fastreg, we also set the rkey */ 247 + mem->ibmr.rkey = mem->ibmr.lkey; 248 + 249 + err = rxe_mem_alloc(rxe, mem, max_pages); 250 + if (err) 251 + goto err1; 252 + 253 + mem->pd = pd; 254 + mem->max_buf = max_pages; 255 + mem->state = RXE_MEM_STATE_FREE; 256 + mem->type = RXE_MEM_TYPE_MR; 257 + 258 + return 0; 259 + 260 + err1: 261 + return err; 262 + } 263 + 264 + static void lookup_iova( 265 + struct rxe_mem *mem, 266 + u64 iova, 267 + int *m_out, 268 + int *n_out, 269 + size_t *offset_out) 270 + { 271 + size_t offset = iova - mem->iova + mem->offset; 272 + int map_index; 273 + int buf_index; 274 + u64 length; 275 + 276 + if (likely(mem->page_shift)) { 277 + *offset_out = offset & mem->page_mask; 278 + offset >>= mem->page_shift; 279 + *n_out = offset & mem->map_mask; 280 + *m_out = offset >> mem->map_shift; 281 + } else { 282 + map_index = 0; 283 + buf_index = 0; 284 + 285 + length = mem->map[map_index]->buf[buf_index].size; 286 + 287 + while (offset >= length) { 288 + offset -= length; 289 + buf_index++; 290 + 291 + if (buf_index == RXE_BUF_PER_MAP) { 292 + map_index++; 293 + buf_index = 0; 294 + } 295 + length = mem->map[map_index]->buf[buf_index].size; 296 + } 297 + 298 + *m_out = map_index; 299 + *n_out = buf_index; 300 + *offset_out = offset; 301 + } 302 + } 303 + 304 + void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) 305 + { 306 + size_t offset; 307 + int m, n; 308 + void *addr; 309 + 310 + if (mem->state != RXE_MEM_STATE_VALID) { 311 + pr_warn("mem not in valid state\n"); 312 + addr = NULL; 313 + goto out; 314 + } 315 + 316 + if (!mem->map) { 317 + addr = (void *)(uintptr_t)iova; 318 + goto out; 319 + } 320 + 321 + if (mem_check_range(mem, iova, length)) { 322 + pr_warn("range violation\n"); 323 + addr = NULL; 324 + goto out; 325 + } 326 + 327 + lookup_iova(mem, iova, &m, &n, &offset); 328 + 329 + if (offset + length > mem->map[m]->buf[n].size) { 330 + pr_warn("crosses page boundary\n"); 331 + addr = NULL; 332 + goto out; 333 + } 334 + 335 + addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; 336 + 337 + out: 338 + return addr; 339 + } 340 + 341 + /* copy data from a range (vaddr, vaddr+length-1) to or from 342 + * a mem object starting at iova. Compute incremental value of 343 + * crc32 if crcp is not zero. caller must hold a reference to mem 344 + */ 345 + int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, 346 + enum copy_direction dir, u32 *crcp) 347 + { 348 + int err; 349 + int bytes; 350 + u8 *va; 351 + struct rxe_map **map; 352 + struct rxe_phys_buf *buf; 353 + int m; 354 + int i; 355 + size_t offset; 356 + u32 crc = crcp ? (*crcp) : 0; 357 + 358 + if (mem->type == RXE_MEM_TYPE_DMA) { 359 + u8 *src, *dest; 360 + 361 + src = (dir == to_mem_obj) ? 362 + addr : ((void *)(uintptr_t)iova); 363 + 364 + dest = (dir == to_mem_obj) ? 365 + ((void *)(uintptr_t)iova) : addr; 366 + 367 + if (crcp) 368 + *crcp = crc32_le(*crcp, src, length); 369 + 370 + memcpy(dest, src, length); 371 + 372 + return 0; 373 + } 374 + 375 + WARN_ON(!mem->map); 376 + 377 + err = mem_check_range(mem, iova, length); 378 + if (err) { 379 + err = -EFAULT; 380 + goto err1; 381 + } 382 + 383 + lookup_iova(mem, iova, &m, &i, &offset); 384 + 385 + map = mem->map + m; 386 + buf = map[0]->buf + i; 387 + 388 + while (length > 0) { 389 + u8 *src, *dest; 390 + 391 + va = (u8 *)(uintptr_t)buf->addr + offset; 392 + src = (dir == to_mem_obj) ? addr : va; 393 + dest = (dir == to_mem_obj) ? va : addr; 394 + 395 + bytes = buf->size - offset; 396 + 397 + if (bytes > length) 398 + bytes = length; 399 + 400 + if (crcp) 401 + crc = crc32_le(crc, src, bytes); 402 + 403 + memcpy(dest, src, bytes); 404 + 405 + length -= bytes; 406 + addr += bytes; 407 + 408 + offset = 0; 409 + buf++; 410 + i++; 411 + 412 + if (i == RXE_BUF_PER_MAP) { 413 + i = 0; 414 + map++; 415 + buf = map[0]->buf; 416 + } 417 + } 418 + 419 + if (crcp) 420 + *crcp = crc; 421 + 422 + return 0; 423 + 424 + err1: 425 + return err; 426 + } 427 + 428 + /* copy data in or out of a wqe, i.e. sg list 429 + * under the control of a dma descriptor 430 + */ 431 + int copy_data( 432 + struct rxe_dev *rxe, 433 + struct rxe_pd *pd, 434 + int access, 435 + struct rxe_dma_info *dma, 436 + void *addr, 437 + int length, 438 + enum copy_direction dir, 439 + u32 *crcp) 440 + { 441 + int bytes; 442 + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 443 + int offset = dma->sge_offset; 444 + int resid = dma->resid; 445 + struct rxe_mem *mem = NULL; 446 + u64 iova; 447 + int err; 448 + 449 + if (length == 0) 450 + return 0; 451 + 452 + if (length > resid) { 453 + err = -EINVAL; 454 + goto err2; 455 + } 456 + 457 + if (sge->length && (offset < sge->length)) { 458 + mem = lookup_mem(pd, access, sge->lkey, lookup_local); 459 + if (!mem) { 460 + err = -EINVAL; 461 + goto err1; 462 + } 463 + } 464 + 465 + while (length > 0) { 466 + bytes = length; 467 + 468 + if (offset >= sge->length) { 469 + if (mem) { 470 + rxe_drop_ref(mem); 471 + mem = NULL; 472 + } 473 + sge++; 474 + dma->cur_sge++; 475 + offset = 0; 476 + 477 + if (dma->cur_sge >= dma->num_sge) { 478 + err = -ENOSPC; 479 + goto err2; 480 + } 481 + 482 + if (sge->length) { 483 + mem = lookup_mem(pd, access, sge->lkey, 484 + lookup_local); 485 + if (!mem) { 486 + err = -EINVAL; 487 + goto err1; 488 + } 489 + } else { 490 + continue; 491 + } 492 + } 493 + 494 + if (bytes > sge->length - offset) 495 + bytes = sge->length - offset; 496 + 497 + if (bytes > 0) { 498 + iova = sge->addr + offset; 499 + 500 + err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); 501 + if (err) 502 + goto err2; 503 + 504 + offset += bytes; 505 + resid -= bytes; 506 + length -= bytes; 507 + addr += bytes; 508 + } 509 + } 510 + 511 + dma->sge_offset = offset; 512 + dma->resid = resid; 513 + 514 + if (mem) 515 + rxe_drop_ref(mem); 516 + 517 + return 0; 518 + 519 + err2: 520 + if (mem) 521 + rxe_drop_ref(mem); 522 + err1: 523 + return err; 524 + } 525 + 526 + int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 527 + { 528 + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 529 + int offset = dma->sge_offset; 530 + int resid = dma->resid; 531 + 532 + while (length) { 533 + unsigned int bytes; 534 + 535 + if (offset >= sge->length) { 536 + sge++; 537 + dma->cur_sge++; 538 + offset = 0; 539 + if (dma->cur_sge >= dma->num_sge) 540 + return -ENOSPC; 541 + } 542 + 543 + bytes = length; 544 + 545 + if (bytes > sge->length - offset) 546 + bytes = sge->length - offset; 547 + 548 + offset += bytes; 549 + resid -= bytes; 550 + length -= bytes; 551 + } 552 + 553 + dma->sge_offset = offset; 554 + dma->resid = resid; 555 + 556 + return 0; 557 + } 558 + 559 + /* (1) find the mem (mr or mw) corresponding to lkey/rkey 560 + * depending on lookup_type 561 + * (2) verify that the (qp) pd matches the mem pd 562 + * (3) verify that the mem can support the requested access 563 + * (4) verify that mem state is valid 564 + */ 565 + struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, 566 + enum lookup_type type) 567 + { 568 + struct rxe_mem *mem; 569 + struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 570 + int index = key >> 8; 571 + 572 + if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 573 + mem = rxe_pool_get_index(&rxe->mr_pool, index); 574 + if (!mem) 575 + goto err1; 576 + } else { 577 + goto err1; 578 + } 579 + 580 + if ((type == lookup_local && mem->lkey != key) || 581 + (type == lookup_remote && mem->rkey != key)) 582 + goto err2; 583 + 584 + if (mem->pd != pd) 585 + goto err2; 586 + 587 + if (access && !(access & mem->access)) 588 + goto err2; 589 + 590 + if (mem->state != RXE_MEM_STATE_VALID) 591 + goto err2; 592 + 593 + return mem; 594 + 595 + err2: 596 + rxe_drop_ref(mem); 597 + err1: 598 + return NULL; 599 + } 600 + 601 + int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 602 + u64 *page, int num_pages, u64 iova) 603 + { 604 + int i; 605 + int num_buf; 606 + int err; 607 + struct rxe_map **map; 608 + struct rxe_phys_buf *buf; 609 + int page_size; 610 + 611 + if (num_pages > mem->max_buf) { 612 + err = -EINVAL; 613 + goto err1; 614 + } 615 + 616 + num_buf = 0; 617 + page_size = 1 << mem->page_shift; 618 + map = mem->map; 619 + buf = map[0]->buf; 620 + 621 + for (i = 0; i < num_pages; i++) { 622 + buf->addr = *page++; 623 + buf->size = page_size; 624 + buf++; 625 + num_buf++; 626 + 627 + if (num_buf == RXE_BUF_PER_MAP) { 628 + map++; 629 + buf = map[0]->buf; 630 + num_buf = 0; 631 + } 632 + } 633 + 634 + mem->iova = iova; 635 + mem->va = iova; 636 + mem->length = num_pages << mem->page_shift; 637 + mem->state = RXE_MEM_STATE_VALID; 638 + 639 + return 0; 640 + 641 + err1: 642 + return err; 643 + }
+708
drivers/infiniband/sw/rxe/rxe_net.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + #include <linux/if_arp.h> 36 + #include <linux/netdevice.h> 37 + #include <linux/if.h> 38 + #include <linux/if_vlan.h> 39 + #include <net/udp_tunnel.h> 40 + #include <net/sch_generic.h> 41 + #include <linux/netfilter.h> 42 + #include <rdma/ib_addr.h> 43 + 44 + #include "rxe.h" 45 + #include "rxe_net.h" 46 + #include "rxe_loc.h" 47 + 48 + static LIST_HEAD(rxe_dev_list); 49 + static spinlock_t dev_list_lock; /* spinlock for device list */ 50 + 51 + struct rxe_dev *net_to_rxe(struct net_device *ndev) 52 + { 53 + struct rxe_dev *rxe; 54 + struct rxe_dev *found = NULL; 55 + 56 + spin_lock_bh(&dev_list_lock); 57 + list_for_each_entry(rxe, &rxe_dev_list, list) { 58 + if (rxe->ndev == ndev) { 59 + found = rxe; 60 + break; 61 + } 62 + } 63 + spin_unlock_bh(&dev_list_lock); 64 + 65 + return found; 66 + } 67 + 68 + struct rxe_dev *get_rxe_by_name(const char* name) 69 + { 70 + struct rxe_dev *rxe; 71 + struct rxe_dev *found = NULL; 72 + 73 + spin_lock_bh(&dev_list_lock); 74 + list_for_each_entry(rxe, &rxe_dev_list, list) { 75 + if (!strcmp(name, rxe->ib_dev.name)) { 76 + found = rxe; 77 + break; 78 + } 79 + } 80 + spin_unlock_bh(&dev_list_lock); 81 + return found; 82 + } 83 + 84 + 85 + struct rxe_recv_sockets recv_sockets; 86 + 87 + static __be64 rxe_mac_to_eui64(struct net_device *ndev) 88 + { 89 + unsigned char *mac_addr = ndev->dev_addr; 90 + __be64 eui64; 91 + unsigned char *dst = (unsigned char *)&eui64; 92 + 93 + dst[0] = mac_addr[0] ^ 2; 94 + dst[1] = mac_addr[1]; 95 + dst[2] = mac_addr[2]; 96 + dst[3] = 0xff; 97 + dst[4] = 0xfe; 98 + dst[5] = mac_addr[3]; 99 + dst[6] = mac_addr[4]; 100 + dst[7] = mac_addr[5]; 101 + 102 + return eui64; 103 + } 104 + 105 + static __be64 node_guid(struct rxe_dev *rxe) 106 + { 107 + return rxe_mac_to_eui64(rxe->ndev); 108 + } 109 + 110 + static __be64 port_guid(struct rxe_dev *rxe) 111 + { 112 + return rxe_mac_to_eui64(rxe->ndev); 113 + } 114 + 115 + static struct device *dma_device(struct rxe_dev *rxe) 116 + { 117 + struct net_device *ndev; 118 + 119 + ndev = rxe->ndev; 120 + 121 + if (ndev->priv_flags & IFF_802_1Q_VLAN) 122 + ndev = vlan_dev_real_dev(ndev); 123 + 124 + return ndev->dev.parent; 125 + } 126 + 127 + static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 128 + { 129 + int err; 130 + unsigned char ll_addr[ETH_ALEN]; 131 + 132 + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 133 + err = dev_mc_add(rxe->ndev, ll_addr); 134 + 135 + return err; 136 + } 137 + 138 + static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) 139 + { 140 + int err; 141 + unsigned char ll_addr[ETH_ALEN]; 142 + 143 + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 144 + err = dev_mc_del(rxe->ndev, ll_addr); 145 + 146 + return err; 147 + } 148 + 149 + static struct dst_entry *rxe_find_route4(struct net_device *ndev, 150 + struct in_addr *saddr, 151 + struct in_addr *daddr) 152 + { 153 + struct rtable *rt; 154 + struct flowi4 fl = { { 0 } }; 155 + 156 + memset(&fl, 0, sizeof(fl)); 157 + fl.flowi4_oif = ndev->ifindex; 158 + memcpy(&fl.saddr, saddr, sizeof(*saddr)); 159 + memcpy(&fl.daddr, daddr, sizeof(*daddr)); 160 + fl.flowi4_proto = IPPROTO_UDP; 161 + 162 + rt = ip_route_output_key(&init_net, &fl); 163 + if (IS_ERR(rt)) { 164 + pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); 165 + return NULL; 166 + } 167 + 168 + return &rt->dst; 169 + } 170 + 171 + #if IS_ENABLED(CONFIG_IPV6) 172 + static struct dst_entry *rxe_find_route6(struct net_device *ndev, 173 + struct in6_addr *saddr, 174 + struct in6_addr *daddr) 175 + { 176 + struct dst_entry *ndst; 177 + struct flowi6 fl6 = { { 0 } }; 178 + 179 + memset(&fl6, 0, sizeof(fl6)); 180 + fl6.flowi6_oif = ndev->ifindex; 181 + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 182 + memcpy(&fl6.daddr, daddr, sizeof(*daddr)); 183 + fl6.flowi6_proto = IPPROTO_UDP; 184 + 185 + if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), 186 + recv_sockets.sk6->sk, &ndst, &fl6))) { 187 + pr_err_ratelimited("no route to %pI6\n", daddr); 188 + goto put; 189 + } 190 + 191 + if (unlikely(ndst->error)) { 192 + pr_err("no route to %pI6\n", daddr); 193 + goto put; 194 + } 195 + 196 + return ndst; 197 + put: 198 + dst_release(ndst); 199 + return NULL; 200 + } 201 + 202 + #else 203 + 204 + static struct dst_entry *rxe_find_route6(struct net_device *ndev, 205 + struct in6_addr *saddr, 206 + struct in6_addr *daddr) 207 + { 208 + return NULL; 209 + } 210 + 211 + #endif 212 + 213 + static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 214 + { 215 + struct udphdr *udph; 216 + struct net_device *ndev = skb->dev; 217 + struct rxe_dev *rxe = net_to_rxe(ndev); 218 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 219 + 220 + if (!rxe) 221 + goto drop; 222 + 223 + if (skb_linearize(skb)) { 224 + pr_err("skb_linearize failed\n"); 225 + goto drop; 226 + } 227 + 228 + udph = udp_hdr(skb); 229 + pkt->rxe = rxe; 230 + pkt->port_num = 1; 231 + pkt->hdr = (u8 *)(udph + 1); 232 + pkt->mask = RXE_GRH_MASK; 233 + pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); 234 + 235 + return rxe_rcv(skb); 236 + drop: 237 + kfree_skb(skb); 238 + return 0; 239 + } 240 + 241 + static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, 242 + bool ipv6) 243 + { 244 + int err; 245 + struct socket *sock; 246 + struct udp_port_cfg udp_cfg; 247 + struct udp_tunnel_sock_cfg tnl_cfg; 248 + 249 + memset(&udp_cfg, 0, sizeof(udp_cfg)); 250 + 251 + if (ipv6) { 252 + udp_cfg.family = AF_INET6; 253 + udp_cfg.ipv6_v6only = 1; 254 + } else { 255 + udp_cfg.family = AF_INET; 256 + } 257 + 258 + udp_cfg.local_udp_port = port; 259 + 260 + /* Create UDP socket */ 261 + err = udp_sock_create(net, &udp_cfg, &sock); 262 + if (err < 0) { 263 + pr_err("failed to create udp socket. err = %d\n", err); 264 + return ERR_PTR(err); 265 + } 266 + 267 + tnl_cfg.sk_user_data = NULL; 268 + tnl_cfg.encap_type = 1; 269 + tnl_cfg.encap_rcv = rxe_udp_encap_recv; 270 + tnl_cfg.encap_destroy = NULL; 271 + 272 + /* Setup UDP tunnel */ 273 + setup_udp_tunnel_sock(net, sock, &tnl_cfg); 274 + 275 + return sock; 276 + } 277 + 278 + static void rxe_release_udp_tunnel(struct socket *sk) 279 + { 280 + udp_tunnel_sock_release(sk); 281 + } 282 + 283 + static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, 284 + __be16 dst_port) 285 + { 286 + struct udphdr *udph; 287 + 288 + __skb_push(skb, sizeof(*udph)); 289 + skb_reset_transport_header(skb); 290 + udph = udp_hdr(skb); 291 + 292 + udph->dest = dst_port; 293 + udph->source = src_port; 294 + udph->len = htons(skb->len); 295 + udph->check = 0; 296 + } 297 + 298 + static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, 299 + __be32 saddr, __be32 daddr, __u8 proto, 300 + __u8 tos, __u8 ttl, __be16 df, bool xnet) 301 + { 302 + struct iphdr *iph; 303 + 304 + skb_scrub_packet(skb, xnet); 305 + 306 + skb_clear_hash(skb); 307 + skb_dst_set(skb, dst); 308 + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 309 + 310 + skb_push(skb, sizeof(struct iphdr)); 311 + skb_reset_network_header(skb); 312 + 313 + iph = ip_hdr(skb); 314 + 315 + iph->version = IPVERSION; 316 + iph->ihl = sizeof(struct iphdr) >> 2; 317 + iph->frag_off = df; 318 + iph->protocol = proto; 319 + iph->tos = tos; 320 + iph->daddr = daddr; 321 + iph->saddr = saddr; 322 + iph->ttl = ttl; 323 + __ip_select_ident(dev_net(dst->dev), iph, 324 + skb_shinfo(skb)->gso_segs ?: 1); 325 + iph->tot_len = htons(skb->len); 326 + ip_send_check(iph); 327 + } 328 + 329 + static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, 330 + struct in6_addr *saddr, struct in6_addr *daddr, 331 + __u8 proto, __u8 prio, __u8 ttl) 332 + { 333 + struct ipv6hdr *ip6h; 334 + 335 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 336 + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED 337 + | IPSKB_REROUTED); 338 + skb_dst_set(skb, dst); 339 + 340 + __skb_push(skb, sizeof(*ip6h)); 341 + skb_reset_network_header(skb); 342 + ip6h = ipv6_hdr(skb); 343 + ip6_flow_hdr(ip6h, prio, htonl(0)); 344 + ip6h->payload_len = htons(skb->len); 345 + ip6h->nexthdr = proto; 346 + ip6h->hop_limit = ttl; 347 + ip6h->daddr = *daddr; 348 + ip6h->saddr = *saddr; 349 + ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); 350 + } 351 + 352 + static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) 353 + { 354 + struct dst_entry *dst; 355 + bool xnet = false; 356 + __be16 df = htons(IP_DF); 357 + struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; 358 + struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; 359 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 360 + 361 + dst = rxe_find_route4(rxe->ndev, saddr, daddr); 362 + if (!dst) { 363 + pr_err("Host not reachable\n"); 364 + return -EHOSTUNREACH; 365 + } 366 + 367 + if (!memcmp(saddr, daddr, sizeof(*daddr))) 368 + pkt->mask |= RXE_LOOPBACK_MASK; 369 + 370 + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 371 + htons(ROCE_V2_UDP_DPORT)); 372 + 373 + prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, 374 + av->grh.traffic_class, av->grh.hop_limit, df, xnet); 375 + return 0; 376 + } 377 + 378 + static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) 379 + { 380 + struct dst_entry *dst; 381 + struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; 382 + struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; 383 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 384 + 385 + dst = rxe_find_route6(rxe->ndev, saddr, daddr); 386 + if (!dst) { 387 + pr_err("Host not reachable\n"); 388 + return -EHOSTUNREACH; 389 + } 390 + 391 + if (!memcmp(saddr, daddr, sizeof(*daddr))) 392 + pkt->mask |= RXE_LOOPBACK_MASK; 393 + 394 + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 395 + htons(ROCE_V2_UDP_DPORT)); 396 + 397 + prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, 398 + av->grh.traffic_class, 399 + av->grh.hop_limit); 400 + return 0; 401 + } 402 + 403 + static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 404 + struct sk_buff *skb, u32 *crc) 405 + { 406 + int err = 0; 407 + struct rxe_av *av = rxe_get_av(pkt); 408 + 409 + if (av->network_type == RDMA_NETWORK_IPV4) 410 + err = prepare4(rxe, skb, av); 411 + else if (av->network_type == RDMA_NETWORK_IPV6) 412 + err = prepare6(rxe, skb, av); 413 + 414 + *crc = rxe_icrc_hdr(pkt, skb); 415 + 416 + return err; 417 + } 418 + 419 + static void rxe_skb_tx_dtor(struct sk_buff *skb) 420 + { 421 + struct sock *sk = skb->sk; 422 + struct rxe_qp *qp = sk->sk_user_data; 423 + int skb_out = atomic_dec_return(&qp->skb_out); 424 + 425 + if (unlikely(qp->need_req_skb && 426 + skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) 427 + rxe_run_task(&qp->req.task, 1); 428 + } 429 + 430 + static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 431 + struct sk_buff *skb) 432 + { 433 + struct sk_buff *nskb; 434 + struct rxe_av *av; 435 + int err; 436 + 437 + av = rxe_get_av(pkt); 438 + 439 + nskb = skb_clone(skb, GFP_ATOMIC); 440 + if (!nskb) 441 + return -ENOMEM; 442 + 443 + nskb->destructor = rxe_skb_tx_dtor; 444 + nskb->sk = pkt->qp->sk->sk; 445 + 446 + if (av->network_type == RDMA_NETWORK_IPV4) { 447 + err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); 448 + } else if (av->network_type == RDMA_NETWORK_IPV6) { 449 + err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); 450 + } else { 451 + pr_err("Unknown layer 3 protocol: %d\n", av->network_type); 452 + kfree_skb(nskb); 453 + return -EINVAL; 454 + } 455 + 456 + if (unlikely(net_xmit_eval(err))) { 457 + pr_debug("error sending packet: %d\n", err); 458 + return -EAGAIN; 459 + } 460 + 461 + kfree_skb(skb); 462 + 463 + return 0; 464 + } 465 + 466 + static int loopback(struct sk_buff *skb) 467 + { 468 + return rxe_rcv(skb); 469 + } 470 + 471 + static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) 472 + { 473 + return rxe->port.port_guid == av->grh.dgid.global.interface_id; 474 + } 475 + 476 + static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, 477 + int paylen, struct rxe_pkt_info *pkt) 478 + { 479 + unsigned int hdr_len; 480 + struct sk_buff *skb; 481 + 482 + if (av->network_type == RDMA_NETWORK_IPV4) 483 + hdr_len = ETH_HLEN + sizeof(struct udphdr) + 484 + sizeof(struct iphdr); 485 + else 486 + hdr_len = ETH_HLEN + sizeof(struct udphdr) + 487 + sizeof(struct ipv6hdr); 488 + 489 + skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), 490 + GFP_ATOMIC); 491 + if (unlikely(!skb)) 492 + return NULL; 493 + 494 + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); 495 + 496 + skb->dev = rxe->ndev; 497 + if (av->network_type == RDMA_NETWORK_IPV4) 498 + skb->protocol = htons(ETH_P_IP); 499 + else 500 + skb->protocol = htons(ETH_P_IPV6); 501 + 502 + pkt->rxe = rxe; 503 + pkt->port_num = 1; 504 + pkt->hdr = skb_put(skb, paylen); 505 + pkt->mask |= RXE_GRH_MASK; 506 + 507 + memset(pkt->hdr, 0, paylen); 508 + 509 + return skb; 510 + } 511 + 512 + /* 513 + * this is required by rxe_cfg to match rxe devices in 514 + * /sys/class/infiniband up with their underlying ethernet devices 515 + */ 516 + static char *parent_name(struct rxe_dev *rxe, unsigned int port_num) 517 + { 518 + return rxe->ndev->name; 519 + } 520 + 521 + static enum rdma_link_layer link_layer(struct rxe_dev *rxe, 522 + unsigned int port_num) 523 + { 524 + return IB_LINK_LAYER_ETHERNET; 525 + } 526 + 527 + static struct rxe_ifc_ops ifc_ops = { 528 + .node_guid = node_guid, 529 + .port_guid = port_guid, 530 + .dma_device = dma_device, 531 + .mcast_add = mcast_add, 532 + .mcast_delete = mcast_delete, 533 + .prepare = prepare, 534 + .send = send, 535 + .loopback = loopback, 536 + .init_packet = init_packet, 537 + .parent_name = parent_name, 538 + .link_layer = link_layer, 539 + }; 540 + 541 + struct rxe_dev *rxe_net_add(struct net_device *ndev) 542 + { 543 + int err; 544 + struct rxe_dev *rxe = NULL; 545 + 546 + rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); 547 + if (!rxe) 548 + return NULL; 549 + 550 + rxe->ifc_ops = &ifc_ops; 551 + rxe->ndev = ndev; 552 + 553 + err = rxe_add(rxe, ndev->mtu); 554 + if (err) { 555 + ib_dealloc_device(&rxe->ib_dev); 556 + return NULL; 557 + } 558 + 559 + spin_lock_bh(&dev_list_lock); 560 + list_add_tail(&rxe_dev_list, &rxe->list); 561 + spin_unlock_bh(&dev_list_lock); 562 + return rxe; 563 + } 564 + 565 + void rxe_remove_all(void) 566 + { 567 + spin_lock_bh(&dev_list_lock); 568 + while (!list_empty(&rxe_dev_list)) { 569 + struct rxe_dev *rxe = 570 + list_first_entry(&rxe_dev_list, struct rxe_dev, list); 571 + 572 + list_del(&rxe->list); 573 + spin_unlock_bh(&dev_list_lock); 574 + rxe_remove(rxe); 575 + spin_lock_bh(&dev_list_lock); 576 + } 577 + spin_unlock_bh(&dev_list_lock); 578 + } 579 + EXPORT_SYMBOL(rxe_remove_all); 580 + 581 + static void rxe_port_event(struct rxe_dev *rxe, 582 + enum ib_event_type event) 583 + { 584 + struct ib_event ev; 585 + 586 + ev.device = &rxe->ib_dev; 587 + ev.element.port_num = 1; 588 + ev.event = event; 589 + 590 + ib_dispatch_event(&ev); 591 + } 592 + 593 + /* Caller must hold net_info_lock */ 594 + void rxe_port_up(struct rxe_dev *rxe) 595 + { 596 + struct rxe_port *port; 597 + 598 + port = &rxe->port; 599 + port->attr.state = IB_PORT_ACTIVE; 600 + port->attr.phys_state = IB_PHYS_STATE_LINK_UP; 601 + 602 + rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); 603 + pr_info("rxe: set %s active\n", rxe->ib_dev.name); 604 + return; 605 + } 606 + 607 + /* Caller must hold net_info_lock */ 608 + void rxe_port_down(struct rxe_dev *rxe) 609 + { 610 + struct rxe_port *port; 611 + 612 + port = &rxe->port; 613 + port->attr.state = IB_PORT_DOWN; 614 + port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; 615 + 616 + rxe_port_event(rxe, IB_EVENT_PORT_ERR); 617 + pr_info("rxe: set %s down\n", rxe->ib_dev.name); 618 + return; 619 + } 620 + 621 + static int rxe_notify(struct notifier_block *not_blk, 622 + unsigned long event, 623 + void *arg) 624 + { 625 + struct net_device *ndev = netdev_notifier_info_to_dev(arg); 626 + struct rxe_dev *rxe = net_to_rxe(ndev); 627 + 628 + if (!rxe) 629 + goto out; 630 + 631 + switch (event) { 632 + case NETDEV_UNREGISTER: 633 + list_del(&rxe->list); 634 + rxe_remove(rxe); 635 + break; 636 + case NETDEV_UP: 637 + rxe_port_up(rxe); 638 + break; 639 + case NETDEV_DOWN: 640 + rxe_port_down(rxe); 641 + break; 642 + case NETDEV_CHANGEMTU: 643 + pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); 644 + rxe_set_mtu(rxe, ndev->mtu); 645 + break; 646 + case NETDEV_REBOOT: 647 + case NETDEV_CHANGE: 648 + case NETDEV_GOING_DOWN: 649 + case NETDEV_CHANGEADDR: 650 + case NETDEV_CHANGENAME: 651 + case NETDEV_FEAT_CHANGE: 652 + default: 653 + pr_info("rxe: ignoring netdev event = %ld for %s\n", 654 + event, ndev->name); 655 + break; 656 + } 657 + out: 658 + return NOTIFY_OK; 659 + } 660 + 661 + static struct notifier_block rxe_net_notifier = { 662 + .notifier_call = rxe_notify, 663 + }; 664 + 665 + int rxe_net_init(void) 666 + { 667 + int err; 668 + 669 + spin_lock_init(&dev_list_lock); 670 + 671 + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, 672 + htons(ROCE_V2_UDP_DPORT), true); 673 + if (IS_ERR(recv_sockets.sk6)) { 674 + recv_sockets.sk6 = NULL; 675 + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); 676 + return -1; 677 + } 678 + 679 + recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, 680 + htons(ROCE_V2_UDP_DPORT), false); 681 + if (IS_ERR(recv_sockets.sk4)) { 682 + rxe_release_udp_tunnel(recv_sockets.sk6); 683 + recv_sockets.sk4 = NULL; 684 + recv_sockets.sk6 = NULL; 685 + pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); 686 + return -1; 687 + } 688 + 689 + err = register_netdevice_notifier(&rxe_net_notifier); 690 + if (err) { 691 + rxe_release_udp_tunnel(recv_sockets.sk6); 692 + rxe_release_udp_tunnel(recv_sockets.sk4); 693 + pr_err("rxe: Failed to rigister netdev notifier\n"); 694 + } 695 + 696 + return err; 697 + } 698 + 699 + void rxe_net_exit(void) 700 + { 701 + if (recv_sockets.sk6) 702 + rxe_release_udp_tunnel(recv_sockets.sk6); 703 + 704 + if (recv_sockets.sk4) 705 + rxe_release_udp_tunnel(recv_sockets.sk4); 706 + 707 + unregister_netdevice_notifier(&rxe_net_notifier); 708 + }
+53
drivers/infiniband/sw/rxe/rxe_net.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_NET_H 35 + #define RXE_NET_H 36 + 37 + #include <net/sock.h> 38 + #include <net/if_inet6.h> 39 + #include <linux/module.h> 40 + 41 + struct rxe_recv_sockets { 42 + struct socket *sk4; 43 + struct socket *sk6; 44 + }; 45 + 46 + extern struct rxe_recv_sockets recv_sockets; 47 + 48 + struct rxe_dev *rxe_net_add(struct net_device *ndev); 49 + 50 + int rxe_net_init(void); 51 + void rxe_net_exit(void); 52 + 53 + #endif /* RXE_NET_H */
+961
drivers/infiniband/sw/rxe/rxe_opcode.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <rdma/ib_pack.h> 35 + #include "rxe_opcode.h" 36 + #include "rxe_hdr.h" 37 + 38 + /* useful information about work request opcodes and pkt opcodes in 39 + * table form 40 + */ 41 + struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { 42 + [IB_WR_RDMA_WRITE] = { 43 + .name = "IB_WR_RDMA_WRITE", 44 + .mask = { 45 + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, 46 + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, 47 + }, 48 + }, 49 + [IB_WR_RDMA_WRITE_WITH_IMM] = { 50 + .name = "IB_WR_RDMA_WRITE_WITH_IMM", 51 + .mask = { 52 + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, 53 + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, 54 + }, 55 + }, 56 + [IB_WR_SEND] = { 57 + .name = "IB_WR_SEND", 58 + .mask = { 59 + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, 60 + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, 61 + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, 62 + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, 63 + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, 64 + }, 65 + }, 66 + [IB_WR_SEND_WITH_IMM] = { 67 + .name = "IB_WR_SEND_WITH_IMM", 68 + .mask = { 69 + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, 70 + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, 71 + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, 72 + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, 73 + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, 74 + }, 75 + }, 76 + [IB_WR_RDMA_READ] = { 77 + .name = "IB_WR_RDMA_READ", 78 + .mask = { 79 + [IB_QPT_RC] = WR_READ_MASK, 80 + }, 81 + }, 82 + [IB_WR_ATOMIC_CMP_AND_SWP] = { 83 + .name = "IB_WR_ATOMIC_CMP_AND_SWP", 84 + .mask = { 85 + [IB_QPT_RC] = WR_ATOMIC_MASK, 86 + }, 87 + }, 88 + [IB_WR_ATOMIC_FETCH_AND_ADD] = { 89 + .name = "IB_WR_ATOMIC_FETCH_AND_ADD", 90 + .mask = { 91 + [IB_QPT_RC] = WR_ATOMIC_MASK, 92 + }, 93 + }, 94 + [IB_WR_LSO] = { 95 + .name = "IB_WR_LSO", 96 + .mask = { 97 + /* not supported */ 98 + }, 99 + }, 100 + [IB_WR_SEND_WITH_INV] = { 101 + .name = "IB_WR_SEND_WITH_INV", 102 + .mask = { 103 + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, 104 + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, 105 + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, 106 + }, 107 + }, 108 + [IB_WR_RDMA_READ_WITH_INV] = { 109 + .name = "IB_WR_RDMA_READ_WITH_INV", 110 + .mask = { 111 + [IB_QPT_RC] = WR_READ_MASK, 112 + }, 113 + }, 114 + [IB_WR_LOCAL_INV] = { 115 + .name = "IB_WR_LOCAL_INV", 116 + .mask = { 117 + [IB_QPT_RC] = WR_REG_MASK, 118 + }, 119 + }, 120 + [IB_WR_REG_MR] = { 121 + .name = "IB_WR_REG_MR", 122 + .mask = { 123 + [IB_QPT_RC] = WR_REG_MASK, 124 + }, 125 + }, 126 + }; 127 + 128 + struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { 129 + [IB_OPCODE_RC_SEND_FIRST] = { 130 + .name = "IB_OPCODE_RC_SEND_FIRST", 131 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK 132 + | RXE_SEND_MASK | RXE_START_MASK, 133 + .length = RXE_BTH_BYTES, 134 + .offset = { 135 + [RXE_BTH] = 0, 136 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 137 + } 138 + }, 139 + [IB_OPCODE_RC_SEND_MIDDLE] = { 140 + .name = "IB_OPCODE_RC_SEND_MIDDLE]", 141 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK 142 + | RXE_MIDDLE_MASK, 143 + .length = RXE_BTH_BYTES, 144 + .offset = { 145 + [RXE_BTH] = 0, 146 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 147 + } 148 + }, 149 + [IB_OPCODE_RC_SEND_LAST] = { 150 + .name = "IB_OPCODE_RC_SEND_LAST", 151 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK 152 + | RXE_SEND_MASK | RXE_END_MASK, 153 + .length = RXE_BTH_BYTES, 154 + .offset = { 155 + [RXE_BTH] = 0, 156 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 157 + } 158 + }, 159 + [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { 160 + .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", 161 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 162 + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, 163 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 164 + .offset = { 165 + [RXE_BTH] = 0, 166 + [RXE_IMMDT] = RXE_BTH_BYTES, 167 + [RXE_PAYLOAD] = RXE_BTH_BYTES 168 + + RXE_IMMDT_BYTES, 169 + } 170 + }, 171 + [IB_OPCODE_RC_SEND_ONLY] = { 172 + .name = "IB_OPCODE_RC_SEND_ONLY", 173 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK 174 + | RXE_RWR_MASK | RXE_SEND_MASK 175 + | RXE_START_MASK | RXE_END_MASK, 176 + .length = RXE_BTH_BYTES, 177 + .offset = { 178 + [RXE_BTH] = 0, 179 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 180 + } 181 + }, 182 + [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { 183 + .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", 184 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 185 + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK 186 + | RXE_START_MASK | RXE_END_MASK, 187 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 188 + .offset = { 189 + [RXE_BTH] = 0, 190 + [RXE_IMMDT] = RXE_BTH_BYTES, 191 + [RXE_PAYLOAD] = RXE_BTH_BYTES 192 + + RXE_IMMDT_BYTES, 193 + } 194 + }, 195 + [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { 196 + .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", 197 + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 198 + | RXE_WRITE_MASK | RXE_START_MASK, 199 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, 200 + .offset = { 201 + [RXE_BTH] = 0, 202 + [RXE_RETH] = RXE_BTH_BYTES, 203 + [RXE_PAYLOAD] = RXE_BTH_BYTES 204 + + RXE_RETH_BYTES, 205 + } 206 + }, 207 + [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { 208 + .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", 209 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK 210 + | RXE_MIDDLE_MASK, 211 + .length = RXE_BTH_BYTES, 212 + .offset = { 213 + [RXE_BTH] = 0, 214 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 215 + } 216 + }, 217 + [IB_OPCODE_RC_RDMA_WRITE_LAST] = { 218 + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", 219 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK 220 + | RXE_END_MASK, 221 + .length = RXE_BTH_BYTES, 222 + .offset = { 223 + [RXE_BTH] = 0, 224 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 225 + } 226 + }, 227 + [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { 228 + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", 229 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 230 + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK 231 + | RXE_END_MASK, 232 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 233 + .offset = { 234 + [RXE_BTH] = 0, 235 + [RXE_IMMDT] = RXE_BTH_BYTES, 236 + [RXE_PAYLOAD] = RXE_BTH_BYTES 237 + + RXE_IMMDT_BYTES, 238 + } 239 + }, 240 + [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { 241 + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", 242 + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 243 + | RXE_WRITE_MASK | RXE_START_MASK 244 + | RXE_END_MASK, 245 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, 246 + .offset = { 247 + [RXE_BTH] = 0, 248 + [RXE_RETH] = RXE_BTH_BYTES, 249 + [RXE_PAYLOAD] = RXE_BTH_BYTES 250 + + RXE_RETH_BYTES, 251 + } 252 + }, 253 + [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { 254 + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", 255 + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK 256 + | RXE_REQ_MASK | RXE_WRITE_MASK 257 + | RXE_COMP_MASK | RXE_RWR_MASK 258 + | RXE_START_MASK | RXE_END_MASK, 259 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, 260 + .offset = { 261 + [RXE_BTH] = 0, 262 + [RXE_RETH] = RXE_BTH_BYTES, 263 + [RXE_IMMDT] = RXE_BTH_BYTES 264 + + RXE_RETH_BYTES, 265 + [RXE_PAYLOAD] = RXE_BTH_BYTES 266 + + RXE_RETH_BYTES 267 + + RXE_IMMDT_BYTES, 268 + } 269 + }, 270 + [IB_OPCODE_RC_RDMA_READ_REQUEST] = { 271 + .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", 272 + .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK 273 + | RXE_START_MASK | RXE_END_MASK, 274 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, 275 + .offset = { 276 + [RXE_BTH] = 0, 277 + [RXE_RETH] = RXE_BTH_BYTES, 278 + [RXE_PAYLOAD] = RXE_BTH_BYTES 279 + + RXE_RETH_BYTES, 280 + } 281 + }, 282 + [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { 283 + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", 284 + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK 285 + | RXE_START_MASK, 286 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, 287 + .offset = { 288 + [RXE_BTH] = 0, 289 + [RXE_AETH] = RXE_BTH_BYTES, 290 + [RXE_PAYLOAD] = RXE_BTH_BYTES 291 + + RXE_AETH_BYTES, 292 + } 293 + }, 294 + [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { 295 + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE", 296 + .mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK, 297 + .length = RXE_BTH_BYTES, 298 + .offset = { 299 + [RXE_BTH] = 0, 300 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 301 + } 302 + }, 303 + [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { 304 + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", 305 + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK 306 + | RXE_END_MASK, 307 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, 308 + .offset = { 309 + [RXE_BTH] = 0, 310 + [RXE_AETH] = RXE_BTH_BYTES, 311 + [RXE_PAYLOAD] = RXE_BTH_BYTES 312 + + RXE_AETH_BYTES, 313 + } 314 + }, 315 + [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { 316 + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", 317 + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK 318 + | RXE_START_MASK | RXE_END_MASK, 319 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, 320 + .offset = { 321 + [RXE_BTH] = 0, 322 + [RXE_AETH] = RXE_BTH_BYTES, 323 + [RXE_PAYLOAD] = RXE_BTH_BYTES 324 + + RXE_AETH_BYTES, 325 + } 326 + }, 327 + [IB_OPCODE_RC_ACKNOWLEDGE] = { 328 + .name = "IB_OPCODE_RC_ACKNOWLEDGE", 329 + .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK 330 + | RXE_END_MASK, 331 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, 332 + .offset = { 333 + [RXE_BTH] = 0, 334 + [RXE_AETH] = RXE_BTH_BYTES, 335 + [RXE_PAYLOAD] = RXE_BTH_BYTES 336 + + RXE_AETH_BYTES, 337 + } 338 + }, 339 + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { 340 + .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", 341 + .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK 342 + | RXE_START_MASK | RXE_END_MASK, 343 + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES, 344 + .offset = { 345 + [RXE_BTH] = 0, 346 + [RXE_AETH] = RXE_BTH_BYTES, 347 + [RXE_ATMACK] = RXE_BTH_BYTES 348 + + RXE_AETH_BYTES, 349 + [RXE_PAYLOAD] = RXE_BTH_BYTES 350 + + RXE_ATMACK_BYTES + RXE_AETH_BYTES, 351 + } 352 + }, 353 + [IB_OPCODE_RC_COMPARE_SWAP] = { 354 + .name = "IB_OPCODE_RC_COMPARE_SWAP", 355 + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK 356 + | RXE_START_MASK | RXE_END_MASK, 357 + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, 358 + .offset = { 359 + [RXE_BTH] = 0, 360 + [RXE_ATMETH] = RXE_BTH_BYTES, 361 + [RXE_PAYLOAD] = RXE_BTH_BYTES 362 + + RXE_ATMETH_BYTES, 363 + } 364 + }, 365 + [IB_OPCODE_RC_FETCH_ADD] = { 366 + .name = "IB_OPCODE_RC_FETCH_ADD", 367 + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK 368 + | RXE_START_MASK | RXE_END_MASK, 369 + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, 370 + .offset = { 371 + [RXE_BTH] = 0, 372 + [RXE_ATMETH] = RXE_BTH_BYTES, 373 + [RXE_PAYLOAD] = RXE_BTH_BYTES 374 + + RXE_ATMETH_BYTES, 375 + } 376 + }, 377 + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { 378 + .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", 379 + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 380 + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, 381 + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, 382 + .offset = { 383 + [RXE_BTH] = 0, 384 + [RXE_IETH] = RXE_BTH_BYTES, 385 + [RXE_PAYLOAD] = RXE_BTH_BYTES 386 + + RXE_IETH_BYTES, 387 + } 388 + }, 389 + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { 390 + .name = "IB_OPCODE_RC_SEND_ONLY_INV", 391 + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 392 + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK 393 + | RXE_END_MASK, 394 + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, 395 + .offset = { 396 + [RXE_BTH] = 0, 397 + [RXE_IETH] = RXE_BTH_BYTES, 398 + [RXE_PAYLOAD] = RXE_BTH_BYTES 399 + + RXE_IETH_BYTES, 400 + } 401 + }, 402 + 403 + /* UC */ 404 + [IB_OPCODE_UC_SEND_FIRST] = { 405 + .name = "IB_OPCODE_UC_SEND_FIRST", 406 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK 407 + | RXE_SEND_MASK | RXE_START_MASK, 408 + .length = RXE_BTH_BYTES, 409 + .offset = { 410 + [RXE_BTH] = 0, 411 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 412 + } 413 + }, 414 + [IB_OPCODE_UC_SEND_MIDDLE] = { 415 + .name = "IB_OPCODE_UC_SEND_MIDDLE", 416 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK 417 + | RXE_MIDDLE_MASK, 418 + .length = RXE_BTH_BYTES, 419 + .offset = { 420 + [RXE_BTH] = 0, 421 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 422 + } 423 + }, 424 + [IB_OPCODE_UC_SEND_LAST] = { 425 + .name = "IB_OPCODE_UC_SEND_LAST", 426 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK 427 + | RXE_SEND_MASK | RXE_END_MASK, 428 + .length = RXE_BTH_BYTES, 429 + .offset = { 430 + [RXE_BTH] = 0, 431 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 432 + } 433 + }, 434 + [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { 435 + .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", 436 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 437 + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, 438 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 439 + .offset = { 440 + [RXE_BTH] = 0, 441 + [RXE_IMMDT] = RXE_BTH_BYTES, 442 + [RXE_PAYLOAD] = RXE_BTH_BYTES 443 + + RXE_IMMDT_BYTES, 444 + } 445 + }, 446 + [IB_OPCODE_UC_SEND_ONLY] = { 447 + .name = "IB_OPCODE_UC_SEND_ONLY", 448 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK 449 + | RXE_RWR_MASK | RXE_SEND_MASK 450 + | RXE_START_MASK | RXE_END_MASK, 451 + .length = RXE_BTH_BYTES, 452 + .offset = { 453 + [RXE_BTH] = 0, 454 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 455 + } 456 + }, 457 + [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { 458 + .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", 459 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 460 + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK 461 + | RXE_START_MASK | RXE_END_MASK, 462 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 463 + .offset = { 464 + [RXE_BTH] = 0, 465 + [RXE_IMMDT] = RXE_BTH_BYTES, 466 + [RXE_PAYLOAD] = RXE_BTH_BYTES 467 + + RXE_IMMDT_BYTES, 468 + } 469 + }, 470 + [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { 471 + .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", 472 + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 473 + | RXE_WRITE_MASK | RXE_START_MASK, 474 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, 475 + .offset = { 476 + [RXE_BTH] = 0, 477 + [RXE_RETH] = RXE_BTH_BYTES, 478 + [RXE_PAYLOAD] = RXE_BTH_BYTES 479 + + RXE_RETH_BYTES, 480 + } 481 + }, 482 + [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { 483 + .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", 484 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK 485 + | RXE_MIDDLE_MASK, 486 + .length = RXE_BTH_BYTES, 487 + .offset = { 488 + [RXE_BTH] = 0, 489 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 490 + } 491 + }, 492 + [IB_OPCODE_UC_RDMA_WRITE_LAST] = { 493 + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", 494 + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK 495 + | RXE_END_MASK, 496 + .length = RXE_BTH_BYTES, 497 + .offset = { 498 + [RXE_BTH] = 0, 499 + [RXE_PAYLOAD] = RXE_BTH_BYTES, 500 + } 501 + }, 502 + [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { 503 + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", 504 + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 505 + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK 506 + | RXE_END_MASK, 507 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, 508 + .offset = { 509 + [RXE_BTH] = 0, 510 + [RXE_IMMDT] = RXE_BTH_BYTES, 511 + [RXE_PAYLOAD] = RXE_BTH_BYTES 512 + + RXE_IMMDT_BYTES, 513 + } 514 + }, 515 + [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { 516 + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", 517 + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 518 + | RXE_WRITE_MASK | RXE_START_MASK 519 + | RXE_END_MASK, 520 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, 521 + .offset = { 522 + [RXE_BTH] = 0, 523 + [RXE_RETH] = RXE_BTH_BYTES, 524 + [RXE_PAYLOAD] = RXE_BTH_BYTES 525 + + RXE_RETH_BYTES, 526 + } 527 + }, 528 + [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { 529 + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", 530 + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK 531 + | RXE_REQ_MASK | RXE_WRITE_MASK 532 + | RXE_COMP_MASK | RXE_RWR_MASK 533 + | RXE_START_MASK | RXE_END_MASK, 534 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, 535 + .offset = { 536 + [RXE_BTH] = 0, 537 + [RXE_RETH] = RXE_BTH_BYTES, 538 + [RXE_IMMDT] = RXE_BTH_BYTES 539 + + RXE_RETH_BYTES, 540 + [RXE_PAYLOAD] = RXE_BTH_BYTES 541 + + RXE_RETH_BYTES 542 + + RXE_IMMDT_BYTES, 543 + } 544 + }, 545 + 546 + /* RD */ 547 + [IB_OPCODE_RD_SEND_FIRST] = { 548 + .name = "IB_OPCODE_RD_SEND_FIRST", 549 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 550 + | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK 551 + | RXE_START_MASK, 552 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 553 + .offset = { 554 + [RXE_BTH] = 0, 555 + [RXE_RDETH] = RXE_BTH_BYTES, 556 + [RXE_DETH] = RXE_BTH_BYTES 557 + + RXE_RDETH_BYTES, 558 + [RXE_PAYLOAD] = RXE_BTH_BYTES 559 + + RXE_RDETH_BYTES 560 + + RXE_DETH_BYTES, 561 + } 562 + }, 563 + [IB_OPCODE_RD_SEND_MIDDLE] = { 564 + .name = "IB_OPCODE_RD_SEND_MIDDLE", 565 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 566 + | RXE_REQ_MASK | RXE_SEND_MASK 567 + | RXE_MIDDLE_MASK, 568 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 569 + .offset = { 570 + [RXE_BTH] = 0, 571 + [RXE_RDETH] = RXE_BTH_BYTES, 572 + [RXE_DETH] = RXE_BTH_BYTES 573 + + RXE_RDETH_BYTES, 574 + [RXE_PAYLOAD] = RXE_BTH_BYTES 575 + + RXE_RDETH_BYTES 576 + + RXE_DETH_BYTES, 577 + } 578 + }, 579 + [IB_OPCODE_RD_SEND_LAST] = { 580 + .name = "IB_OPCODE_RD_SEND_LAST", 581 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 582 + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK 583 + | RXE_END_MASK, 584 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 585 + .offset = { 586 + [RXE_BTH] = 0, 587 + [RXE_RDETH] = RXE_BTH_BYTES, 588 + [RXE_DETH] = RXE_BTH_BYTES 589 + + RXE_RDETH_BYTES, 590 + [RXE_PAYLOAD] = RXE_BTH_BYTES 591 + + RXE_RDETH_BYTES 592 + + RXE_DETH_BYTES, 593 + } 594 + }, 595 + [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { 596 + .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", 597 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK 598 + | RXE_PAYLOAD_MASK | RXE_REQ_MASK 599 + | RXE_COMP_MASK | RXE_SEND_MASK 600 + | RXE_END_MASK, 601 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES 602 + + RXE_RDETH_BYTES, 603 + .offset = { 604 + [RXE_BTH] = 0, 605 + [RXE_RDETH] = RXE_BTH_BYTES, 606 + [RXE_DETH] = RXE_BTH_BYTES 607 + + RXE_RDETH_BYTES, 608 + [RXE_IMMDT] = RXE_BTH_BYTES 609 + + RXE_RDETH_BYTES 610 + + RXE_DETH_BYTES, 611 + [RXE_PAYLOAD] = RXE_BTH_BYTES 612 + + RXE_RDETH_BYTES 613 + + RXE_DETH_BYTES 614 + + RXE_IMMDT_BYTES, 615 + } 616 + }, 617 + [IB_OPCODE_RD_SEND_ONLY] = { 618 + .name = "IB_OPCODE_RD_SEND_ONLY", 619 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 620 + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK 621 + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, 622 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 623 + .offset = { 624 + [RXE_BTH] = 0, 625 + [RXE_RDETH] = RXE_BTH_BYTES, 626 + [RXE_DETH] = RXE_BTH_BYTES 627 + + RXE_RDETH_BYTES, 628 + [RXE_PAYLOAD] = RXE_BTH_BYTES 629 + + RXE_RDETH_BYTES 630 + + RXE_DETH_BYTES, 631 + } 632 + }, 633 + [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { 634 + .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", 635 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK 636 + | RXE_PAYLOAD_MASK | RXE_REQ_MASK 637 + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK 638 + | RXE_START_MASK | RXE_END_MASK, 639 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES 640 + + RXE_RDETH_BYTES, 641 + .offset = { 642 + [RXE_BTH] = 0, 643 + [RXE_RDETH] = RXE_BTH_BYTES, 644 + [RXE_DETH] = RXE_BTH_BYTES 645 + + RXE_RDETH_BYTES, 646 + [RXE_IMMDT] = RXE_BTH_BYTES 647 + + RXE_RDETH_BYTES 648 + + RXE_DETH_BYTES, 649 + [RXE_PAYLOAD] = RXE_BTH_BYTES 650 + + RXE_RDETH_BYTES 651 + + RXE_DETH_BYTES 652 + + RXE_IMMDT_BYTES, 653 + } 654 + }, 655 + [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { 656 + .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", 657 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK 658 + | RXE_PAYLOAD_MASK | RXE_REQ_MASK 659 + | RXE_WRITE_MASK | RXE_START_MASK, 660 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES 661 + + RXE_RDETH_BYTES, 662 + .offset = { 663 + [RXE_BTH] = 0, 664 + [RXE_RDETH] = RXE_BTH_BYTES, 665 + [RXE_DETH] = RXE_BTH_BYTES 666 + + RXE_RDETH_BYTES, 667 + [RXE_RETH] = RXE_BTH_BYTES 668 + + RXE_RDETH_BYTES 669 + + RXE_DETH_BYTES, 670 + [RXE_PAYLOAD] = RXE_BTH_BYTES 671 + + RXE_RDETH_BYTES 672 + + RXE_DETH_BYTES 673 + + RXE_RETH_BYTES, 674 + } 675 + }, 676 + [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { 677 + .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", 678 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 679 + | RXE_REQ_MASK | RXE_WRITE_MASK 680 + | RXE_MIDDLE_MASK, 681 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 682 + .offset = { 683 + [RXE_BTH] = 0, 684 + [RXE_RDETH] = RXE_BTH_BYTES, 685 + [RXE_DETH] = RXE_BTH_BYTES 686 + + RXE_RDETH_BYTES, 687 + [RXE_PAYLOAD] = RXE_BTH_BYTES 688 + + RXE_RDETH_BYTES 689 + + RXE_DETH_BYTES, 690 + } 691 + }, 692 + [IB_OPCODE_RD_RDMA_WRITE_LAST] = { 693 + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", 694 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK 695 + | RXE_REQ_MASK | RXE_WRITE_MASK 696 + | RXE_END_MASK, 697 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, 698 + .offset = { 699 + [RXE_BTH] = 0, 700 + [RXE_RDETH] = RXE_BTH_BYTES, 701 + [RXE_DETH] = RXE_BTH_BYTES 702 + + RXE_RDETH_BYTES, 703 + [RXE_PAYLOAD] = RXE_BTH_BYTES 704 + + RXE_RDETH_BYTES 705 + + RXE_DETH_BYTES, 706 + } 707 + }, 708 + [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { 709 + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", 710 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK 711 + | RXE_PAYLOAD_MASK | RXE_REQ_MASK 712 + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK 713 + | RXE_END_MASK, 714 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES 715 + + RXE_RDETH_BYTES, 716 + .offset = { 717 + [RXE_BTH] = 0, 718 + [RXE_RDETH] = RXE_BTH_BYTES, 719 + [RXE_DETH] = RXE_BTH_BYTES 720 + + RXE_RDETH_BYTES, 721 + [RXE_IMMDT] = RXE_BTH_BYTES 722 + + RXE_RDETH_BYTES 723 + + RXE_DETH_BYTES, 724 + [RXE_PAYLOAD] = RXE_BTH_BYTES 725 + + RXE_RDETH_BYTES 726 + + RXE_DETH_BYTES 727 + + RXE_IMMDT_BYTES, 728 + } 729 + }, 730 + [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { 731 + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", 732 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK 733 + | RXE_PAYLOAD_MASK | RXE_REQ_MASK 734 + | RXE_WRITE_MASK | RXE_START_MASK 735 + | RXE_END_MASK, 736 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES 737 + + RXE_RDETH_BYTES, 738 + .offset = { 739 + [RXE_BTH] = 0, 740 + [RXE_RDETH] = RXE_BTH_BYTES, 741 + [RXE_DETH] = RXE_BTH_BYTES 742 + + RXE_RDETH_BYTES, 743 + [RXE_RETH] = RXE_BTH_BYTES 744 + + RXE_RDETH_BYTES 745 + + RXE_DETH_BYTES, 746 + [RXE_PAYLOAD] = RXE_BTH_BYTES 747 + + RXE_RDETH_BYTES 748 + + RXE_DETH_BYTES 749 + + RXE_RETH_BYTES, 750 + } 751 + }, 752 + [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { 753 + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", 754 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK 755 + | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK 756 + | RXE_REQ_MASK | RXE_WRITE_MASK 757 + | RXE_COMP_MASK | RXE_RWR_MASK 758 + | RXE_START_MASK | RXE_END_MASK, 759 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES 760 + + RXE_DETH_BYTES + RXE_RDETH_BYTES, 761 + .offset = { 762 + [RXE_BTH] = 0, 763 + [RXE_RDETH] = RXE_BTH_BYTES, 764 + [RXE_DETH] = RXE_BTH_BYTES 765 + + RXE_RDETH_BYTES, 766 + [RXE_RETH] = RXE_BTH_BYTES 767 + + RXE_RDETH_BYTES 768 + + RXE_DETH_BYTES, 769 + [RXE_IMMDT] = RXE_BTH_BYTES 770 + + RXE_RDETH_BYTES 771 + + RXE_DETH_BYTES 772 + + RXE_RETH_BYTES, 773 + [RXE_PAYLOAD] = RXE_BTH_BYTES 774 + + RXE_RDETH_BYTES 775 + + RXE_DETH_BYTES 776 + + RXE_RETH_BYTES 777 + + RXE_IMMDT_BYTES, 778 + } 779 + }, 780 + [IB_OPCODE_RD_RDMA_READ_REQUEST] = { 781 + .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", 782 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK 783 + | RXE_REQ_MASK | RXE_READ_MASK 784 + | RXE_START_MASK | RXE_END_MASK, 785 + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES 786 + + RXE_RDETH_BYTES, 787 + .offset = { 788 + [RXE_BTH] = 0, 789 + [RXE_RDETH] = RXE_BTH_BYTES, 790 + [RXE_DETH] = RXE_BTH_BYTES 791 + + RXE_RDETH_BYTES, 792 + [RXE_RETH] = RXE_BTH_BYTES 793 + + RXE_RDETH_BYTES 794 + + RXE_DETH_BYTES, 795 + [RXE_PAYLOAD] = RXE_BTH_BYTES 796 + + RXE_RETH_BYTES 797 + + RXE_DETH_BYTES 798 + + RXE_RDETH_BYTES, 799 + } 800 + }, 801 + [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { 802 + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", 803 + .mask = RXE_RDETH_MASK | RXE_AETH_MASK 804 + | RXE_PAYLOAD_MASK | RXE_ACK_MASK 805 + | RXE_START_MASK, 806 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, 807 + .offset = { 808 + [RXE_BTH] = 0, 809 + [RXE_RDETH] = RXE_BTH_BYTES, 810 + [RXE_AETH] = RXE_BTH_BYTES 811 + + RXE_RDETH_BYTES, 812 + [RXE_PAYLOAD] = RXE_BTH_BYTES 813 + + RXE_RDETH_BYTES 814 + + RXE_AETH_BYTES, 815 + } 816 + }, 817 + [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { 818 + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", 819 + .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK 820 + | RXE_MIDDLE_MASK, 821 + .length = RXE_BTH_BYTES + RXE_RDETH_BYTES, 822 + .offset = { 823 + [RXE_BTH] = 0, 824 + [RXE_RDETH] = RXE_BTH_BYTES, 825 + [RXE_PAYLOAD] = RXE_BTH_BYTES 826 + + RXE_RDETH_BYTES, 827 + } 828 + }, 829 + [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { 830 + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", 831 + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK 832 + | RXE_ACK_MASK | RXE_END_MASK, 833 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, 834 + .offset = { 835 + [RXE_BTH] = 0, 836 + [RXE_RDETH] = RXE_BTH_BYTES, 837 + [RXE_AETH] = RXE_BTH_BYTES 838 + + RXE_RDETH_BYTES, 839 + [RXE_PAYLOAD] = RXE_BTH_BYTES 840 + + RXE_RDETH_BYTES 841 + + RXE_AETH_BYTES, 842 + } 843 + }, 844 + [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { 845 + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", 846 + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK 847 + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, 848 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, 849 + .offset = { 850 + [RXE_BTH] = 0, 851 + [RXE_RDETH] = RXE_BTH_BYTES, 852 + [RXE_AETH] = RXE_BTH_BYTES 853 + + RXE_RDETH_BYTES, 854 + [RXE_PAYLOAD] = RXE_BTH_BYTES 855 + + RXE_RDETH_BYTES 856 + + RXE_AETH_BYTES, 857 + } 858 + }, 859 + [IB_OPCODE_RD_ACKNOWLEDGE] = { 860 + .name = "IB_OPCODE_RD_ACKNOWLEDGE", 861 + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK 862 + | RXE_START_MASK | RXE_END_MASK, 863 + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, 864 + .offset = { 865 + [RXE_BTH] = 0, 866 + [RXE_RDETH] = RXE_BTH_BYTES, 867 + [RXE_AETH] = RXE_BTH_BYTES 868 + + RXE_RDETH_BYTES, 869 + } 870 + }, 871 + [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { 872 + .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", 873 + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK 874 + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, 875 + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES 876 + + RXE_RDETH_BYTES, 877 + .offset = { 878 + [RXE_BTH] = 0, 879 + [RXE_RDETH] = RXE_BTH_BYTES, 880 + [RXE_AETH] = RXE_BTH_BYTES 881 + + RXE_RDETH_BYTES, 882 + [RXE_ATMACK] = RXE_BTH_BYTES 883 + + RXE_RDETH_BYTES 884 + + RXE_AETH_BYTES, 885 + } 886 + }, 887 + [IB_OPCODE_RD_COMPARE_SWAP] = { 888 + .name = "RD_COMPARE_SWAP", 889 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK 890 + | RXE_REQ_MASK | RXE_ATOMIC_MASK 891 + | RXE_START_MASK | RXE_END_MASK, 892 + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES 893 + + RXE_RDETH_BYTES, 894 + .offset = { 895 + [RXE_BTH] = 0, 896 + [RXE_RDETH] = RXE_BTH_BYTES, 897 + [RXE_DETH] = RXE_BTH_BYTES 898 + + RXE_RDETH_BYTES, 899 + [RXE_ATMETH] = RXE_BTH_BYTES 900 + + RXE_RDETH_BYTES 901 + + RXE_DETH_BYTES, 902 + [RXE_PAYLOAD] = RXE_BTH_BYTES + 903 + + RXE_ATMETH_BYTES 904 + + RXE_DETH_BYTES + 905 + + RXE_RDETH_BYTES, 906 + } 907 + }, 908 + [IB_OPCODE_RD_FETCH_ADD] = { 909 + .name = "IB_OPCODE_RD_FETCH_ADD", 910 + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK 911 + | RXE_REQ_MASK | RXE_ATOMIC_MASK 912 + | RXE_START_MASK | RXE_END_MASK, 913 + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES 914 + + RXE_RDETH_BYTES, 915 + .offset = { 916 + [RXE_BTH] = 0, 917 + [RXE_RDETH] = RXE_BTH_BYTES, 918 + [RXE_DETH] = RXE_BTH_BYTES 919 + + RXE_RDETH_BYTES, 920 + [RXE_ATMETH] = RXE_BTH_BYTES 921 + + RXE_RDETH_BYTES 922 + + RXE_DETH_BYTES, 923 + [RXE_PAYLOAD] = RXE_BTH_BYTES + 924 + + RXE_ATMETH_BYTES 925 + + RXE_DETH_BYTES + 926 + + RXE_RDETH_BYTES, 927 + } 928 + }, 929 + 930 + /* UD */ 931 + [IB_OPCODE_UD_SEND_ONLY] = { 932 + .name = "IB_OPCODE_UD_SEND_ONLY", 933 + .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK 934 + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK 935 + | RXE_START_MASK | RXE_END_MASK, 936 + .length = RXE_BTH_BYTES + RXE_DETH_BYTES, 937 + .offset = { 938 + [RXE_BTH] = 0, 939 + [RXE_DETH] = RXE_BTH_BYTES, 940 + [RXE_PAYLOAD] = RXE_BTH_BYTES 941 + + RXE_DETH_BYTES, 942 + } 943 + }, 944 + [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { 945 + .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", 946 + .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK 947 + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK 948 + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, 949 + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES, 950 + .offset = { 951 + [RXE_BTH] = 0, 952 + [RXE_DETH] = RXE_BTH_BYTES, 953 + [RXE_IMMDT] = RXE_BTH_BYTES 954 + + RXE_DETH_BYTES, 955 + [RXE_PAYLOAD] = RXE_BTH_BYTES 956 + + RXE_DETH_BYTES 957 + + RXE_IMMDT_BYTES, 958 + } 959 + }, 960 + 961 + };
+129
drivers/infiniband/sw/rxe/rxe_opcode.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_OPCODE_H 35 + #define RXE_OPCODE_H 36 + 37 + /* 38 + * contains header bit mask definitions and header lengths 39 + * declaration of the rxe_opcode_info struct and 40 + * rxe_wr_opcode_info struct 41 + */ 42 + 43 + enum rxe_wr_mask { 44 + WR_INLINE_MASK = BIT(0), 45 + WR_ATOMIC_MASK = BIT(1), 46 + WR_SEND_MASK = BIT(2), 47 + WR_READ_MASK = BIT(3), 48 + WR_WRITE_MASK = BIT(4), 49 + WR_LOCAL_MASK = BIT(5), 50 + WR_REG_MASK = BIT(6), 51 + 52 + WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, 53 + WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, 54 + WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, 55 + WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, 56 + }; 57 + 58 + #define WR_MAX_QPT (8) 59 + 60 + struct rxe_wr_opcode_info { 61 + char *name; 62 + enum rxe_wr_mask mask[WR_MAX_QPT]; 63 + }; 64 + 65 + extern struct rxe_wr_opcode_info rxe_wr_opcode_info[]; 66 + 67 + enum rxe_hdr_type { 68 + RXE_LRH, 69 + RXE_GRH, 70 + RXE_BTH, 71 + RXE_RETH, 72 + RXE_AETH, 73 + RXE_ATMETH, 74 + RXE_ATMACK, 75 + RXE_IETH, 76 + RXE_RDETH, 77 + RXE_DETH, 78 + RXE_IMMDT, 79 + RXE_PAYLOAD, 80 + NUM_HDR_TYPES 81 + }; 82 + 83 + enum rxe_hdr_mask { 84 + RXE_LRH_MASK = BIT(RXE_LRH), 85 + RXE_GRH_MASK = BIT(RXE_GRH), 86 + RXE_BTH_MASK = BIT(RXE_BTH), 87 + RXE_IMMDT_MASK = BIT(RXE_IMMDT), 88 + RXE_RETH_MASK = BIT(RXE_RETH), 89 + RXE_AETH_MASK = BIT(RXE_AETH), 90 + RXE_ATMETH_MASK = BIT(RXE_ATMETH), 91 + RXE_ATMACK_MASK = BIT(RXE_ATMACK), 92 + RXE_IETH_MASK = BIT(RXE_IETH), 93 + RXE_RDETH_MASK = BIT(RXE_RDETH), 94 + RXE_DETH_MASK = BIT(RXE_DETH), 95 + RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), 96 + 97 + RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), 98 + RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1), 99 + RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2), 100 + RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), 101 + RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), 102 + RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), 103 + 104 + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), 105 + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), 106 + 107 + RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), 108 + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), 109 + RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), 110 + 111 + RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), 112 + 113 + RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), 114 + RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), 115 + }; 116 + 117 + #define OPCODE_NONE (-1) 118 + #define RXE_NUM_OPCODE 256 119 + 120 + struct rxe_opcode_info { 121 + char *name; 122 + enum rxe_hdr_mask mask; 123 + int length; 124 + int offset[NUM_HDR_TYPES]; 125 + }; 126 + 127 + extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE]; 128 + 129 + #endif /* RXE_OPCODE_H */
+172
drivers/infiniband/sw/rxe/rxe_param.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_PARAM_H 35 + #define RXE_PARAM_H 36 + 37 + static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) 38 + { 39 + if (mtu < 256) 40 + return 0; 41 + else if (mtu < 512) 42 + return IB_MTU_256; 43 + else if (mtu < 1024) 44 + return IB_MTU_512; 45 + else if (mtu < 2048) 46 + return IB_MTU_1024; 47 + else if (mtu < 4096) 48 + return IB_MTU_2048; 49 + else 50 + return IB_MTU_4096; 51 + } 52 + 53 + /* Find the IB mtu for a given network MTU. */ 54 + static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) 55 + { 56 + mtu -= RXE_MAX_HDR_LENGTH; 57 + 58 + return rxe_mtu_int_to_enum(mtu); 59 + } 60 + 61 + /* default/initial rxe device parameter settings */ 62 + enum rxe_device_param { 63 + RXE_FW_VER = 0, 64 + RXE_MAX_MR_SIZE = -1ull, 65 + RXE_PAGE_SIZE_CAP = 0xfffff000, 66 + RXE_VENDOR_ID = 0, 67 + RXE_VENDOR_PART_ID = 0, 68 + RXE_HW_VER = 0, 69 + RXE_MAX_QP = 0x10000, 70 + RXE_MAX_QP_WR = 0x4000, 71 + RXE_MAX_INLINE_DATA = 400, 72 + RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR 73 + | IB_DEVICE_BAD_QKEY_CNTR 74 + | IB_DEVICE_AUTO_PATH_MIG 75 + | IB_DEVICE_CHANGE_PHY_PORT 76 + | IB_DEVICE_UD_AV_PORT_ENFORCE 77 + | IB_DEVICE_PORT_ACTIVE_EVENT 78 + | IB_DEVICE_SYS_IMAGE_GUID 79 + | IB_DEVICE_RC_RNR_NAK_GEN 80 + | IB_DEVICE_SRQ_RESIZE 81 + | IB_DEVICE_MEM_MGT_EXTENSIONS, 82 + RXE_MAX_SGE = 32, 83 + RXE_MAX_SGE_RD = 32, 84 + RXE_MAX_CQ = 16384, 85 + RXE_MAX_LOG_CQE = 13, 86 + RXE_MAX_MR = 2 * 1024, 87 + RXE_MAX_PD = 0x7ffc, 88 + RXE_MAX_QP_RD_ATOM = 128, 89 + RXE_MAX_EE_RD_ATOM = 0, 90 + RXE_MAX_RES_RD_ATOM = 0x3f000, 91 + RXE_MAX_QP_INIT_RD_ATOM = 128, 92 + RXE_MAX_EE_INIT_RD_ATOM = 0, 93 + RXE_ATOMIC_CAP = 1, 94 + RXE_MAX_EE = 0, 95 + RXE_MAX_RDD = 0, 96 + RXE_MAX_MW = 0, 97 + RXE_MAX_RAW_IPV6_QP = 0, 98 + RXE_MAX_RAW_ETHY_QP = 0, 99 + RXE_MAX_MCAST_GRP = 8192, 100 + RXE_MAX_MCAST_QP_ATTACH = 56, 101 + RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, 102 + RXE_MAX_AH = 100, 103 + RXE_MAX_FMR = 0, 104 + RXE_MAX_MAP_PER_FMR = 0, 105 + RXE_MAX_SRQ = 960, 106 + RXE_MAX_SRQ_WR = 0x4000, 107 + RXE_MIN_SRQ_WR = 1, 108 + RXE_MAX_SRQ_SGE = 27, 109 + RXE_MIN_SRQ_SGE = 1, 110 + RXE_MAX_FMR_PAGE_LIST_LEN = 512, 111 + RXE_MAX_PKEYS = 64, 112 + RXE_LOCAL_CA_ACK_DELAY = 15, 113 + 114 + RXE_MAX_UCONTEXT = 512, 115 + 116 + RXE_NUM_PORT = 1, 117 + RXE_NUM_COMP_VECTORS = 1, 118 + 119 + RXE_MIN_QP_INDEX = 16, 120 + RXE_MAX_QP_INDEX = 0x00020000, 121 + 122 + RXE_MIN_SRQ_INDEX = 0x00020001, 123 + RXE_MAX_SRQ_INDEX = 0x00040000, 124 + 125 + RXE_MIN_MR_INDEX = 0x00000001, 126 + RXE_MAX_MR_INDEX = 0x00040000, 127 + RXE_MIN_MW_INDEX = 0x00040001, 128 + RXE_MAX_MW_INDEX = 0x00060000, 129 + RXE_MAX_PKT_PER_ACK = 64, 130 + 131 + RXE_MAX_UNACKED_PSNS = 128, 132 + 133 + /* Max inflight SKBs per queue pair */ 134 + RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, 135 + RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, 136 + 137 + /* Delay before calling arbiter timer */ 138 + RXE_NSEC_ARB_TIMER_DELAY = 200, 139 + }; 140 + 141 + /* default/initial rxe port parameters */ 142 + enum rxe_port_param { 143 + RXE_PORT_STATE = IB_PORT_DOWN, 144 + RXE_PORT_MAX_MTU = IB_MTU_4096, 145 + RXE_PORT_ACTIVE_MTU = IB_MTU_256, 146 + RXE_PORT_GID_TBL_LEN = 1024, 147 + RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, 148 + RXE_PORT_MAX_MSG_SZ = 0x800000, 149 + RXE_PORT_BAD_PKEY_CNTR = 0, 150 + RXE_PORT_QKEY_VIOL_CNTR = 0, 151 + RXE_PORT_LID = 0, 152 + RXE_PORT_SM_LID = 0, 153 + RXE_PORT_SM_SL = 0, 154 + RXE_PORT_LMC = 0, 155 + RXE_PORT_MAX_VL_NUM = 1, 156 + RXE_PORT_SUBNET_TIMEOUT = 0, 157 + RXE_PORT_INIT_TYPE_REPLY = 0, 158 + RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, 159 + RXE_PORT_ACTIVE_SPEED = 1, 160 + RXE_PORT_PKEY_TBL_LEN = 64, 161 + RXE_PORT_PHYS_STATE = 2, 162 + RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, 163 + }; 164 + 165 + /* default/initial port info parameters */ 166 + enum rxe_port_info_param { 167 + RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */ 168 + RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */ 169 + RXE_PORT_INFO_OPER_VL = 1, /* 1 */ 170 + }; 171 + 172 + #endif /* RXE_PARAM_H */
+502
drivers/infiniband/sw/rxe/rxe_pool.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + 37 + /* info about object pools 38 + * note that mr and mw share a single index space 39 + * so that one can map an lkey to the correct type of object 40 + */ 41 + struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { 42 + [RXE_TYPE_UC] = { 43 + .name = "rxe-uc", 44 + .size = sizeof(struct rxe_ucontext), 45 + }, 46 + [RXE_TYPE_PD] = { 47 + .name = "rxe-pd", 48 + .size = sizeof(struct rxe_pd), 49 + }, 50 + [RXE_TYPE_AH] = { 51 + .name = "rxe-ah", 52 + .size = sizeof(struct rxe_ah), 53 + .flags = RXE_POOL_ATOMIC, 54 + }, 55 + [RXE_TYPE_SRQ] = { 56 + .name = "rxe-srq", 57 + .size = sizeof(struct rxe_srq), 58 + .flags = RXE_POOL_INDEX, 59 + .min_index = RXE_MIN_SRQ_INDEX, 60 + .max_index = RXE_MAX_SRQ_INDEX, 61 + }, 62 + [RXE_TYPE_QP] = { 63 + .name = "rxe-qp", 64 + .size = sizeof(struct rxe_qp), 65 + .cleanup = rxe_qp_cleanup, 66 + .flags = RXE_POOL_INDEX, 67 + .min_index = RXE_MIN_QP_INDEX, 68 + .max_index = RXE_MAX_QP_INDEX, 69 + }, 70 + [RXE_TYPE_CQ] = { 71 + .name = "rxe-cq", 72 + .size = sizeof(struct rxe_cq), 73 + .cleanup = rxe_cq_cleanup, 74 + }, 75 + [RXE_TYPE_MR] = { 76 + .name = "rxe-mr", 77 + .size = sizeof(struct rxe_mem), 78 + .cleanup = rxe_mem_cleanup, 79 + .flags = RXE_POOL_INDEX, 80 + .max_index = RXE_MAX_MR_INDEX, 81 + .min_index = RXE_MIN_MR_INDEX, 82 + }, 83 + [RXE_TYPE_MW] = { 84 + .name = "rxe-mw", 85 + .size = sizeof(struct rxe_mem), 86 + .flags = RXE_POOL_INDEX, 87 + .max_index = RXE_MAX_MW_INDEX, 88 + .min_index = RXE_MIN_MW_INDEX, 89 + }, 90 + [RXE_TYPE_MC_GRP] = { 91 + .name = "rxe-mc_grp", 92 + .size = sizeof(struct rxe_mc_grp), 93 + .cleanup = rxe_mc_cleanup, 94 + .flags = RXE_POOL_KEY, 95 + .key_offset = offsetof(struct rxe_mc_grp, mgid), 96 + .key_size = sizeof(union ib_gid), 97 + }, 98 + [RXE_TYPE_MC_ELEM] = { 99 + .name = "rxe-mc_elem", 100 + .size = sizeof(struct rxe_mc_elem), 101 + .flags = RXE_POOL_ATOMIC, 102 + }, 103 + }; 104 + 105 + static inline char *pool_name(struct rxe_pool *pool) 106 + { 107 + return rxe_type_info[pool->type].name; 108 + } 109 + 110 + static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) 111 + { 112 + return rxe_type_info[pool->type].cache; 113 + } 114 + 115 + static inline enum rxe_elem_type rxe_type(void *arg) 116 + { 117 + struct rxe_pool_entry *elem = arg; 118 + 119 + return elem->pool->type; 120 + } 121 + 122 + int rxe_cache_init(void) 123 + { 124 + int err; 125 + int i; 126 + size_t size; 127 + struct rxe_type_info *type; 128 + 129 + for (i = 0; i < RXE_NUM_TYPES; i++) { 130 + type = &rxe_type_info[i]; 131 + size = ALIGN(type->size, RXE_POOL_ALIGN); 132 + type->cache = kmem_cache_create(type->name, size, 133 + RXE_POOL_ALIGN, 134 + RXE_POOL_CACHE_FLAGS, NULL); 135 + if (!type->cache) { 136 + pr_err("Unable to init kmem cache for %s\n", 137 + type->name); 138 + err = -ENOMEM; 139 + goto err1; 140 + } 141 + } 142 + 143 + return 0; 144 + 145 + err1: 146 + while (--i >= 0) { 147 + kmem_cache_destroy(type->cache); 148 + type->cache = NULL; 149 + } 150 + 151 + return err; 152 + } 153 + 154 + void rxe_cache_exit(void) 155 + { 156 + int i; 157 + struct rxe_type_info *type; 158 + 159 + for (i = 0; i < RXE_NUM_TYPES; i++) { 160 + type = &rxe_type_info[i]; 161 + kmem_cache_destroy(type->cache); 162 + type->cache = NULL; 163 + } 164 + } 165 + 166 + static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) 167 + { 168 + int err = 0; 169 + size_t size; 170 + 171 + if ((max - min + 1) < pool->max_elem) { 172 + pr_warn("not enough indices for max_elem\n"); 173 + err = -EINVAL; 174 + goto out; 175 + } 176 + 177 + pool->max_index = max; 178 + pool->min_index = min; 179 + 180 + size = BITS_TO_LONGS(max - min + 1) * sizeof(long); 181 + pool->table = kmalloc(size, GFP_KERNEL); 182 + if (!pool->table) { 183 + pr_warn("no memory for bit table\n"); 184 + err = -ENOMEM; 185 + goto out; 186 + } 187 + 188 + pool->table_size = size; 189 + bitmap_zero(pool->table, max - min + 1); 190 + 191 + out: 192 + return err; 193 + } 194 + 195 + int rxe_pool_init( 196 + struct rxe_dev *rxe, 197 + struct rxe_pool *pool, 198 + enum rxe_elem_type type, 199 + unsigned max_elem) 200 + { 201 + int err = 0; 202 + size_t size = rxe_type_info[type].size; 203 + 204 + memset(pool, 0, sizeof(*pool)); 205 + 206 + pool->rxe = rxe; 207 + pool->type = type; 208 + pool->max_elem = max_elem; 209 + pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); 210 + pool->flags = rxe_type_info[type].flags; 211 + pool->tree = RB_ROOT; 212 + pool->cleanup = rxe_type_info[type].cleanup; 213 + 214 + atomic_set(&pool->num_elem, 0); 215 + 216 + kref_init(&pool->ref_cnt); 217 + 218 + spin_lock_init(&pool->pool_lock); 219 + 220 + if (rxe_type_info[type].flags & RXE_POOL_INDEX) { 221 + err = rxe_pool_init_index(pool, 222 + rxe_type_info[type].max_index, 223 + rxe_type_info[type].min_index); 224 + if (err) 225 + goto out; 226 + } 227 + 228 + if (rxe_type_info[type].flags & RXE_POOL_KEY) { 229 + pool->key_offset = rxe_type_info[type].key_offset; 230 + pool->key_size = rxe_type_info[type].key_size; 231 + } 232 + 233 + pool->state = rxe_pool_valid; 234 + 235 + out: 236 + return err; 237 + } 238 + 239 + static void rxe_pool_release(struct kref *kref) 240 + { 241 + struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); 242 + 243 + pool->state = rxe_pool_invalid; 244 + kfree(pool->table); 245 + } 246 + 247 + static void rxe_pool_put(struct rxe_pool *pool) 248 + { 249 + kref_put(&pool->ref_cnt, rxe_pool_release); 250 + } 251 + 252 + int rxe_pool_cleanup(struct rxe_pool *pool) 253 + { 254 + unsigned long flags; 255 + 256 + spin_lock_irqsave(&pool->pool_lock, flags); 257 + pool->state = rxe_pool_invalid; 258 + if (atomic_read(&pool->num_elem) > 0) 259 + pr_warn("%s pool destroyed with unfree'd elem\n", 260 + pool_name(pool)); 261 + spin_unlock_irqrestore(&pool->pool_lock, flags); 262 + 263 + rxe_pool_put(pool); 264 + 265 + return 0; 266 + } 267 + 268 + static u32 alloc_index(struct rxe_pool *pool) 269 + { 270 + u32 index; 271 + u32 range = pool->max_index - pool->min_index + 1; 272 + 273 + index = find_next_zero_bit(pool->table, range, pool->last); 274 + if (index >= range) 275 + index = find_first_zero_bit(pool->table, range); 276 + 277 + set_bit(index, pool->table); 278 + pool->last = index; 279 + return index + pool->min_index; 280 + } 281 + 282 + static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) 283 + { 284 + struct rb_node **link = &pool->tree.rb_node; 285 + struct rb_node *parent = NULL; 286 + struct rxe_pool_entry *elem; 287 + 288 + while (*link) { 289 + parent = *link; 290 + elem = rb_entry(parent, struct rxe_pool_entry, node); 291 + 292 + if (elem->index == new->index) { 293 + pr_warn("element already exists!\n"); 294 + goto out; 295 + } 296 + 297 + if (elem->index > new->index) 298 + link = &(*link)->rb_left; 299 + else 300 + link = &(*link)->rb_right; 301 + } 302 + 303 + rb_link_node(&new->node, parent, link); 304 + rb_insert_color(&new->node, &pool->tree); 305 + out: 306 + return; 307 + } 308 + 309 + static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) 310 + { 311 + struct rb_node **link = &pool->tree.rb_node; 312 + struct rb_node *parent = NULL; 313 + struct rxe_pool_entry *elem; 314 + int cmp; 315 + 316 + while (*link) { 317 + parent = *link; 318 + elem = rb_entry(parent, struct rxe_pool_entry, node); 319 + 320 + cmp = memcmp((u8 *)elem + pool->key_offset, 321 + (u8 *)new + pool->key_offset, pool->key_size); 322 + 323 + if (cmp == 0) { 324 + pr_warn("key already exists!\n"); 325 + goto out; 326 + } 327 + 328 + if (cmp > 0) 329 + link = &(*link)->rb_left; 330 + else 331 + link = &(*link)->rb_right; 332 + } 333 + 334 + rb_link_node(&new->node, parent, link); 335 + rb_insert_color(&new->node, &pool->tree); 336 + out: 337 + return; 338 + } 339 + 340 + void rxe_add_key(void *arg, void *key) 341 + { 342 + struct rxe_pool_entry *elem = arg; 343 + struct rxe_pool *pool = elem->pool; 344 + unsigned long flags; 345 + 346 + spin_lock_irqsave(&pool->pool_lock, flags); 347 + memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); 348 + insert_key(pool, elem); 349 + spin_unlock_irqrestore(&pool->pool_lock, flags); 350 + } 351 + 352 + void rxe_drop_key(void *arg) 353 + { 354 + struct rxe_pool_entry *elem = arg; 355 + struct rxe_pool *pool = elem->pool; 356 + unsigned long flags; 357 + 358 + spin_lock_irqsave(&pool->pool_lock, flags); 359 + rb_erase(&elem->node, &pool->tree); 360 + spin_unlock_irqrestore(&pool->pool_lock, flags); 361 + } 362 + 363 + void rxe_add_index(void *arg) 364 + { 365 + struct rxe_pool_entry *elem = arg; 366 + struct rxe_pool *pool = elem->pool; 367 + unsigned long flags; 368 + 369 + spin_lock_irqsave(&pool->pool_lock, flags); 370 + elem->index = alloc_index(pool); 371 + insert_index(pool, elem); 372 + spin_unlock_irqrestore(&pool->pool_lock, flags); 373 + } 374 + 375 + void rxe_drop_index(void *arg) 376 + { 377 + struct rxe_pool_entry *elem = arg; 378 + struct rxe_pool *pool = elem->pool; 379 + unsigned long flags; 380 + 381 + spin_lock_irqsave(&pool->pool_lock, flags); 382 + clear_bit(elem->index - pool->min_index, pool->table); 383 + rb_erase(&elem->node, &pool->tree); 384 + spin_unlock_irqrestore(&pool->pool_lock, flags); 385 + } 386 + 387 + void *rxe_alloc(struct rxe_pool *pool) 388 + { 389 + struct rxe_pool_entry *elem; 390 + unsigned long flags; 391 + 392 + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); 393 + 394 + spin_lock_irqsave(&pool->pool_lock, flags); 395 + if (pool->state != rxe_pool_valid) { 396 + spin_unlock_irqrestore(&pool->pool_lock, flags); 397 + return NULL; 398 + } 399 + kref_get(&pool->ref_cnt); 400 + spin_unlock_irqrestore(&pool->pool_lock, flags); 401 + 402 + kref_get(&pool->rxe->ref_cnt); 403 + 404 + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { 405 + atomic_dec(&pool->num_elem); 406 + rxe_dev_put(pool->rxe); 407 + rxe_pool_put(pool); 408 + return NULL; 409 + } 410 + 411 + elem = kmem_cache_zalloc(pool_cache(pool), 412 + (pool->flags & RXE_POOL_ATOMIC) ? 413 + GFP_ATOMIC : GFP_KERNEL); 414 + 415 + elem->pool = pool; 416 + kref_init(&elem->ref_cnt); 417 + 418 + return elem; 419 + } 420 + 421 + void rxe_elem_release(struct kref *kref) 422 + { 423 + struct rxe_pool_entry *elem = 424 + container_of(kref, struct rxe_pool_entry, ref_cnt); 425 + struct rxe_pool *pool = elem->pool; 426 + 427 + if (pool->cleanup) 428 + pool->cleanup(elem); 429 + 430 + kmem_cache_free(pool_cache(pool), elem); 431 + atomic_dec(&pool->num_elem); 432 + rxe_dev_put(pool->rxe); 433 + rxe_pool_put(pool); 434 + } 435 + 436 + void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) 437 + { 438 + struct rb_node *node = NULL; 439 + struct rxe_pool_entry *elem = NULL; 440 + unsigned long flags; 441 + 442 + spin_lock_irqsave(&pool->pool_lock, flags); 443 + 444 + if (pool->state != rxe_pool_valid) 445 + goto out; 446 + 447 + node = pool->tree.rb_node; 448 + 449 + while (node) { 450 + elem = rb_entry(node, struct rxe_pool_entry, node); 451 + 452 + if (elem->index > index) 453 + node = node->rb_left; 454 + else if (elem->index < index) 455 + node = node->rb_right; 456 + else 457 + break; 458 + } 459 + 460 + if (node) 461 + kref_get(&elem->ref_cnt); 462 + 463 + out: 464 + spin_unlock_irqrestore(&pool->pool_lock, flags); 465 + return node ? (void *)elem : NULL; 466 + } 467 + 468 + void *rxe_pool_get_key(struct rxe_pool *pool, void *key) 469 + { 470 + struct rb_node *node = NULL; 471 + struct rxe_pool_entry *elem = NULL; 472 + int cmp; 473 + unsigned long flags; 474 + 475 + spin_lock_irqsave(&pool->pool_lock, flags); 476 + 477 + if (pool->state != rxe_pool_valid) 478 + goto out; 479 + 480 + node = pool->tree.rb_node; 481 + 482 + while (node) { 483 + elem = rb_entry(node, struct rxe_pool_entry, node); 484 + 485 + cmp = memcmp((u8 *)elem + pool->key_offset, 486 + key, pool->key_size); 487 + 488 + if (cmp > 0) 489 + node = node->rb_left; 490 + else if (cmp < 0) 491 + node = node->rb_right; 492 + else 493 + break; 494 + } 495 + 496 + if (node) 497 + kref_get(&elem->ref_cnt); 498 + 499 + out: 500 + spin_unlock_irqrestore(&pool->pool_lock, flags); 501 + return node ? ((void *)elem) : NULL; 502 + }
+163
drivers/infiniband/sw/rxe/rxe_pool.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_POOL_H 35 + #define RXE_POOL_H 36 + 37 + #define RXE_POOL_ALIGN (16) 38 + #define RXE_POOL_CACHE_FLAGS (0) 39 + 40 + enum rxe_pool_flags { 41 + RXE_POOL_ATOMIC = BIT(0), 42 + RXE_POOL_INDEX = BIT(1), 43 + RXE_POOL_KEY = BIT(2), 44 + }; 45 + 46 + enum rxe_elem_type { 47 + RXE_TYPE_UC, 48 + RXE_TYPE_PD, 49 + RXE_TYPE_AH, 50 + RXE_TYPE_SRQ, 51 + RXE_TYPE_QP, 52 + RXE_TYPE_CQ, 53 + RXE_TYPE_MR, 54 + RXE_TYPE_MW, 55 + RXE_TYPE_MC_GRP, 56 + RXE_TYPE_MC_ELEM, 57 + RXE_NUM_TYPES, /* keep me last */ 58 + }; 59 + 60 + struct rxe_type_info { 61 + char *name; 62 + size_t size; 63 + void (*cleanup)(void *obj); 64 + enum rxe_pool_flags flags; 65 + u32 max_index; 66 + u32 min_index; 67 + size_t key_offset; 68 + size_t key_size; 69 + struct kmem_cache *cache; 70 + }; 71 + 72 + extern struct rxe_type_info rxe_type_info[]; 73 + 74 + enum rxe_pool_state { 75 + rxe_pool_invalid, 76 + rxe_pool_valid, 77 + }; 78 + 79 + struct rxe_pool_entry { 80 + struct rxe_pool *pool; 81 + struct kref ref_cnt; 82 + struct list_head list; 83 + 84 + /* only used if indexed or keyed */ 85 + struct rb_node node; 86 + u32 index; 87 + }; 88 + 89 + struct rxe_pool { 90 + struct rxe_dev *rxe; 91 + spinlock_t pool_lock; /* pool spinlock */ 92 + size_t elem_size; 93 + struct kref ref_cnt; 94 + void (*cleanup)(void *obj); 95 + enum rxe_pool_state state; 96 + enum rxe_pool_flags flags; 97 + enum rxe_elem_type type; 98 + 99 + unsigned int max_elem; 100 + atomic_t num_elem; 101 + 102 + /* only used if indexed or keyed */ 103 + struct rb_root tree; 104 + unsigned long *table; 105 + size_t table_size; 106 + u32 max_index; 107 + u32 min_index; 108 + u32 last; 109 + size_t key_offset; 110 + size_t key_size; 111 + }; 112 + 113 + /* initialize slab caches for managed objects */ 114 + int rxe_cache_init(void); 115 + 116 + /* cleanup slab caches for managed objects */ 117 + void rxe_cache_exit(void); 118 + 119 + /* initialize a pool of objects with given limit on 120 + * number of elements. gets parameters from rxe_type_info 121 + * pool elements will be allocated out of a slab cache 122 + */ 123 + int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, 124 + enum rxe_elem_type type, u32 max_elem); 125 + 126 + /* free resources from object pool */ 127 + int rxe_pool_cleanup(struct rxe_pool *pool); 128 + 129 + /* allocate an object from pool */ 130 + void *rxe_alloc(struct rxe_pool *pool); 131 + 132 + /* assign an index to an indexed object and insert object into 133 + * pool's rb tree 134 + */ 135 + void rxe_add_index(void *elem); 136 + 137 + /* drop an index and remove object from rb tree */ 138 + void rxe_drop_index(void *elem); 139 + 140 + /* assign a key to a keyed object and insert object into 141 + * pool's rb tree 142 + */ 143 + void rxe_add_key(void *elem, void *key); 144 + 145 + /* remove elem from rb tree */ 146 + void rxe_drop_key(void *elem); 147 + 148 + /* lookup an indexed object from index. takes a reference on object */ 149 + void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); 150 + 151 + /* lookup keyed object from key. takes a reference on the object */ 152 + void *rxe_pool_get_key(struct rxe_pool *pool, void *key); 153 + 154 + /* cleanup an object when all references are dropped */ 155 + void rxe_elem_release(struct kref *kref); 156 + 157 + /* take a reference on an object */ 158 + #define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) 159 + 160 + /* drop a reference on an object */ 161 + #define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release) 162 + 163 + #endif /* RXE_POOL_H */
+851
drivers/infiniband/sw/rxe/rxe_qp.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + #include <linux/delay.h> 36 + #include <linux/sched.h> 37 + 38 + #include "rxe.h" 39 + #include "rxe_loc.h" 40 + #include "rxe_queue.h" 41 + #include "rxe_task.h" 42 + 43 + char *rxe_qp_state_name[] = { 44 + [QP_STATE_RESET] = "RESET", 45 + [QP_STATE_INIT] = "INIT", 46 + [QP_STATE_READY] = "READY", 47 + [QP_STATE_DRAIN] = "DRAIN", 48 + [QP_STATE_DRAINED] = "DRAINED", 49 + [QP_STATE_ERROR] = "ERROR", 50 + }; 51 + 52 + static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, 53 + int has_srq) 54 + { 55 + if (cap->max_send_wr > rxe->attr.max_qp_wr) { 56 + pr_warn("invalid send wr = %d > %d\n", 57 + cap->max_send_wr, rxe->attr.max_qp_wr); 58 + goto err1; 59 + } 60 + 61 + if (cap->max_send_sge > rxe->attr.max_sge) { 62 + pr_warn("invalid send sge = %d > %d\n", 63 + cap->max_send_sge, rxe->attr.max_sge); 64 + goto err1; 65 + } 66 + 67 + if (!has_srq) { 68 + if (cap->max_recv_wr > rxe->attr.max_qp_wr) { 69 + pr_warn("invalid recv wr = %d > %d\n", 70 + cap->max_recv_wr, rxe->attr.max_qp_wr); 71 + goto err1; 72 + } 73 + 74 + if (cap->max_recv_sge > rxe->attr.max_sge) { 75 + pr_warn("invalid recv sge = %d > %d\n", 76 + cap->max_recv_sge, rxe->attr.max_sge); 77 + goto err1; 78 + } 79 + } 80 + 81 + if (cap->max_inline_data > rxe->max_inline_data) { 82 + pr_warn("invalid max inline data = %d > %d\n", 83 + cap->max_inline_data, rxe->max_inline_data); 84 + goto err1; 85 + } 86 + 87 + return 0; 88 + 89 + err1: 90 + return -EINVAL; 91 + } 92 + 93 + int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) 94 + { 95 + struct ib_qp_cap *cap = &init->cap; 96 + struct rxe_port *port; 97 + int port_num = init->port_num; 98 + 99 + if (!init->recv_cq || !init->send_cq) { 100 + pr_warn("missing cq\n"); 101 + goto err1; 102 + } 103 + 104 + if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) 105 + goto err1; 106 + 107 + if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { 108 + if (port_num != 1) { 109 + pr_warn("invalid port = %d\n", port_num); 110 + goto err1; 111 + } 112 + 113 + port = &rxe->port; 114 + 115 + if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { 116 + pr_warn("SMI QP exists for port %d\n", port_num); 117 + goto err1; 118 + } 119 + 120 + if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { 121 + pr_warn("GSI QP exists for port %d\n", port_num); 122 + goto err1; 123 + } 124 + } 125 + 126 + return 0; 127 + 128 + err1: 129 + return -EINVAL; 130 + } 131 + 132 + static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n) 133 + { 134 + qp->resp.res_head = 0; 135 + qp->resp.res_tail = 0; 136 + qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL); 137 + 138 + if (!qp->resp.resources) 139 + return -ENOMEM; 140 + 141 + return 0; 142 + } 143 + 144 + static void free_rd_atomic_resources(struct rxe_qp *qp) 145 + { 146 + if (qp->resp.resources) { 147 + int i; 148 + 149 + for (i = 0; i < qp->attr.max_rd_atomic; i++) { 150 + struct resp_res *res = &qp->resp.resources[i]; 151 + 152 + free_rd_atomic_resource(qp, res); 153 + } 154 + kfree(qp->resp.resources); 155 + qp->resp.resources = NULL; 156 + } 157 + } 158 + 159 + void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) 160 + { 161 + if (res->type == RXE_ATOMIC_MASK) { 162 + rxe_drop_ref(qp); 163 + kfree_skb(res->atomic.skb); 164 + } else if (res->type == RXE_READ_MASK) { 165 + if (res->read.mr) 166 + rxe_drop_ref(res->read.mr); 167 + } 168 + res->type = 0; 169 + } 170 + 171 + static void cleanup_rd_atomic_resources(struct rxe_qp *qp) 172 + { 173 + int i; 174 + struct resp_res *res; 175 + 176 + if (qp->resp.resources) { 177 + for (i = 0; i < qp->attr.max_rd_atomic; i++) { 178 + res = &qp->resp.resources[i]; 179 + free_rd_atomic_resource(qp, res); 180 + } 181 + } 182 + } 183 + 184 + static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, 185 + struct ib_qp_init_attr *init) 186 + { 187 + struct rxe_port *port; 188 + u32 qpn; 189 + 190 + qp->sq_sig_type = init->sq_sig_type; 191 + qp->attr.path_mtu = 1; 192 + qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); 193 + 194 + qpn = qp->pelem.index; 195 + port = &rxe->port; 196 + 197 + switch (init->qp_type) { 198 + case IB_QPT_SMI: 199 + qp->ibqp.qp_num = 0; 200 + port->qp_smi_index = qpn; 201 + qp->attr.port_num = init->port_num; 202 + break; 203 + 204 + case IB_QPT_GSI: 205 + qp->ibqp.qp_num = 1; 206 + port->qp_gsi_index = qpn; 207 + qp->attr.port_num = init->port_num; 208 + break; 209 + 210 + default: 211 + qp->ibqp.qp_num = qpn; 212 + break; 213 + } 214 + 215 + INIT_LIST_HEAD(&qp->grp_list); 216 + 217 + skb_queue_head_init(&qp->send_pkts); 218 + 219 + spin_lock_init(&qp->grp_lock); 220 + spin_lock_init(&qp->state_lock); 221 + 222 + atomic_set(&qp->ssn, 0); 223 + atomic_set(&qp->skb_out, 0); 224 + } 225 + 226 + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, 227 + struct ib_qp_init_attr *init, 228 + struct ib_ucontext *context, struct ib_udata *udata) 229 + { 230 + int err; 231 + int wqe_size; 232 + 233 + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); 234 + if (err < 0) 235 + return err; 236 + qp->sk->sk->sk_user_data = qp; 237 + 238 + qp->sq.max_wr = init->cap.max_send_wr; 239 + qp->sq.max_sge = init->cap.max_send_sge; 240 + qp->sq.max_inline = init->cap.max_inline_data; 241 + 242 + wqe_size = max_t(int, sizeof(struct rxe_send_wqe) + 243 + qp->sq.max_sge * sizeof(struct ib_sge), 244 + sizeof(struct rxe_send_wqe) + 245 + qp->sq.max_inline); 246 + 247 + qp->sq.queue = rxe_queue_init(rxe, 248 + &qp->sq.max_wr, 249 + wqe_size); 250 + if (!qp->sq.queue) 251 + return -ENOMEM; 252 + 253 + err = do_mmap_info(rxe, udata, true, 254 + context, qp->sq.queue->buf, 255 + qp->sq.queue->buf_size, &qp->sq.queue->ip); 256 + 257 + if (err) { 258 + kvfree(qp->sq.queue->buf); 259 + kfree(qp->sq.queue); 260 + return err; 261 + } 262 + 263 + qp->req.wqe_index = producer_index(qp->sq.queue); 264 + qp->req.state = QP_STATE_RESET; 265 + qp->req.opcode = -1; 266 + qp->comp.opcode = -1; 267 + 268 + spin_lock_init(&qp->sq.sq_lock); 269 + skb_queue_head_init(&qp->req_pkts); 270 + 271 + rxe_init_task(rxe, &qp->req.task, qp, 272 + rxe_requester, "req"); 273 + rxe_init_task(rxe, &qp->comp.task, qp, 274 + rxe_completer, "comp"); 275 + 276 + init_timer(&qp->rnr_nak_timer); 277 + qp->rnr_nak_timer.function = rnr_nak_timer; 278 + qp->rnr_nak_timer.data = (unsigned long)qp; 279 + 280 + init_timer(&qp->retrans_timer); 281 + qp->retrans_timer.function = retransmit_timer; 282 + qp->retrans_timer.data = (unsigned long)qp; 283 + qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ 284 + 285 + return 0; 286 + } 287 + 288 + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, 289 + struct ib_qp_init_attr *init, 290 + struct ib_ucontext *context, struct ib_udata *udata) 291 + { 292 + int err; 293 + int wqe_size; 294 + 295 + if (!qp->srq) { 296 + qp->rq.max_wr = init->cap.max_recv_wr; 297 + qp->rq.max_sge = init->cap.max_recv_sge; 298 + 299 + wqe_size = rcv_wqe_size(qp->rq.max_sge); 300 + 301 + pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", 302 + qp->rq.max_wr, qp->rq.max_sge, wqe_size); 303 + 304 + qp->rq.queue = rxe_queue_init(rxe, 305 + &qp->rq.max_wr, 306 + wqe_size); 307 + if (!qp->rq.queue) 308 + return -ENOMEM; 309 + 310 + err = do_mmap_info(rxe, udata, false, context, 311 + qp->rq.queue->buf, 312 + qp->rq.queue->buf_size, 313 + &qp->rq.queue->ip); 314 + if (err) { 315 + kvfree(qp->rq.queue->buf); 316 + kfree(qp->rq.queue); 317 + return err; 318 + } 319 + } 320 + 321 + spin_lock_init(&qp->rq.producer_lock); 322 + spin_lock_init(&qp->rq.consumer_lock); 323 + 324 + skb_queue_head_init(&qp->resp_pkts); 325 + 326 + rxe_init_task(rxe, &qp->resp.task, qp, 327 + rxe_responder, "resp"); 328 + 329 + qp->resp.opcode = OPCODE_NONE; 330 + qp->resp.msn = 0; 331 + qp->resp.state = QP_STATE_RESET; 332 + 333 + return 0; 334 + } 335 + 336 + /* called by the create qp verb */ 337 + int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, 338 + struct ib_qp_init_attr *init, struct ib_udata *udata, 339 + struct ib_pd *ibpd) 340 + { 341 + int err; 342 + struct rxe_cq *rcq = to_rcq(init->recv_cq); 343 + struct rxe_cq *scq = to_rcq(init->send_cq); 344 + struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; 345 + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; 346 + 347 + rxe_add_ref(pd); 348 + rxe_add_ref(rcq); 349 + rxe_add_ref(scq); 350 + if (srq) 351 + rxe_add_ref(srq); 352 + 353 + qp->pd = pd; 354 + qp->rcq = rcq; 355 + qp->scq = scq; 356 + qp->srq = srq; 357 + 358 + rxe_qp_init_misc(rxe, qp, init); 359 + 360 + err = rxe_qp_init_req(rxe, qp, init, context, udata); 361 + if (err) 362 + goto err1; 363 + 364 + err = rxe_qp_init_resp(rxe, qp, init, context, udata); 365 + if (err) 366 + goto err2; 367 + 368 + qp->attr.qp_state = IB_QPS_RESET; 369 + qp->valid = 1; 370 + 371 + return 0; 372 + 373 + err2: 374 + rxe_queue_cleanup(qp->sq.queue); 375 + err1: 376 + if (srq) 377 + rxe_drop_ref(srq); 378 + rxe_drop_ref(scq); 379 + rxe_drop_ref(rcq); 380 + rxe_drop_ref(pd); 381 + 382 + return err; 383 + } 384 + 385 + /* called by the query qp verb */ 386 + int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) 387 + { 388 + init->event_handler = qp->ibqp.event_handler; 389 + init->qp_context = qp->ibqp.qp_context; 390 + init->send_cq = qp->ibqp.send_cq; 391 + init->recv_cq = qp->ibqp.recv_cq; 392 + init->srq = qp->ibqp.srq; 393 + 394 + init->cap.max_send_wr = qp->sq.max_wr; 395 + init->cap.max_send_sge = qp->sq.max_sge; 396 + init->cap.max_inline_data = qp->sq.max_inline; 397 + 398 + if (!qp->srq) { 399 + init->cap.max_recv_wr = qp->rq.max_wr; 400 + init->cap.max_recv_sge = qp->rq.max_sge; 401 + } 402 + 403 + init->sq_sig_type = qp->sq_sig_type; 404 + 405 + init->qp_type = qp->ibqp.qp_type; 406 + init->port_num = 1; 407 + 408 + return 0; 409 + } 410 + 411 + /* called by the modify qp verb, this routine checks all the parameters before 412 + * making any changes 413 + */ 414 + int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, 415 + struct ib_qp_attr *attr, int mask) 416 + { 417 + enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? 418 + attr->cur_qp_state : qp->attr.qp_state; 419 + enum ib_qp_state new_state = (mask & IB_QP_STATE) ? 420 + attr->qp_state : cur_state; 421 + 422 + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, 423 + IB_LINK_LAYER_ETHERNET)) { 424 + pr_warn("invalid mask or state for qp\n"); 425 + goto err1; 426 + } 427 + 428 + if (mask & IB_QP_STATE) { 429 + if (cur_state == IB_QPS_SQD) { 430 + if (qp->req.state == QP_STATE_DRAIN && 431 + new_state != IB_QPS_ERR) 432 + goto err1; 433 + } 434 + } 435 + 436 + if (mask & IB_QP_PORT) { 437 + if (attr->port_num != 1) { 438 + pr_warn("invalid port %d\n", attr->port_num); 439 + goto err1; 440 + } 441 + } 442 + 443 + if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) 444 + goto err1; 445 + 446 + if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) 447 + goto err1; 448 + 449 + if (mask & IB_QP_ALT_PATH) { 450 + if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) 451 + goto err1; 452 + if (attr->alt_port_num != 1) { 453 + pr_warn("invalid alt port %d\n", attr->alt_port_num); 454 + goto err1; 455 + } 456 + if (attr->alt_timeout > 31) { 457 + pr_warn("invalid QP alt timeout %d > 31\n", 458 + attr->alt_timeout); 459 + goto err1; 460 + } 461 + } 462 + 463 + if (mask & IB_QP_PATH_MTU) { 464 + struct rxe_port *port = &rxe->port; 465 + 466 + enum ib_mtu max_mtu = port->attr.max_mtu; 467 + enum ib_mtu mtu = attr->path_mtu; 468 + 469 + if (mtu > max_mtu) { 470 + pr_debug("invalid mtu (%d) > (%d)\n", 471 + ib_mtu_enum_to_int(mtu), 472 + ib_mtu_enum_to_int(max_mtu)); 473 + goto err1; 474 + } 475 + } 476 + 477 + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { 478 + if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { 479 + pr_warn("invalid max_rd_atomic %d > %d\n", 480 + attr->max_rd_atomic, 481 + rxe->attr.max_qp_rd_atom); 482 + goto err1; 483 + } 484 + } 485 + 486 + if (mask & IB_QP_TIMEOUT) { 487 + if (attr->timeout > 31) { 488 + pr_warn("invalid QP timeout %d > 31\n", 489 + attr->timeout); 490 + goto err1; 491 + } 492 + } 493 + 494 + return 0; 495 + 496 + err1: 497 + return -EINVAL; 498 + } 499 + 500 + /* move the qp to the reset state */ 501 + static void rxe_qp_reset(struct rxe_qp *qp) 502 + { 503 + /* stop tasks from running */ 504 + rxe_disable_task(&qp->resp.task); 505 + 506 + /* stop request/comp */ 507 + if (qp->sq.queue) { 508 + if (qp_type(qp) == IB_QPT_RC) 509 + rxe_disable_task(&qp->comp.task); 510 + rxe_disable_task(&qp->req.task); 511 + } 512 + 513 + /* move qp to the reset state */ 514 + qp->req.state = QP_STATE_RESET; 515 + qp->resp.state = QP_STATE_RESET; 516 + 517 + /* let state machines reset themselves drain work and packet queues 518 + * etc. 519 + */ 520 + __rxe_do_task(&qp->resp.task); 521 + 522 + if (qp->sq.queue) { 523 + __rxe_do_task(&qp->comp.task); 524 + __rxe_do_task(&qp->req.task); 525 + } 526 + 527 + /* cleanup attributes */ 528 + atomic_set(&qp->ssn, 0); 529 + qp->req.opcode = -1; 530 + qp->req.need_retry = 0; 531 + qp->req.noack_pkts = 0; 532 + qp->resp.msn = 0; 533 + qp->resp.opcode = -1; 534 + qp->resp.drop_msg = 0; 535 + qp->resp.goto_error = 0; 536 + qp->resp.sent_psn_nak = 0; 537 + 538 + if (qp->resp.mr) { 539 + rxe_drop_ref(qp->resp.mr); 540 + qp->resp.mr = NULL; 541 + } 542 + 543 + cleanup_rd_atomic_resources(qp); 544 + 545 + /* reenable tasks */ 546 + rxe_enable_task(&qp->resp.task); 547 + 548 + if (qp->sq.queue) { 549 + if (qp_type(qp) == IB_QPT_RC) 550 + rxe_enable_task(&qp->comp.task); 551 + 552 + rxe_enable_task(&qp->req.task); 553 + } 554 + } 555 + 556 + /* drain the send queue */ 557 + static void rxe_qp_drain(struct rxe_qp *qp) 558 + { 559 + if (qp->sq.queue) { 560 + if (qp->req.state != QP_STATE_DRAINED) { 561 + qp->req.state = QP_STATE_DRAIN; 562 + if (qp_type(qp) == IB_QPT_RC) 563 + rxe_run_task(&qp->comp.task, 1); 564 + else 565 + __rxe_do_task(&qp->comp.task); 566 + rxe_run_task(&qp->req.task, 1); 567 + } 568 + } 569 + } 570 + 571 + /* move the qp to the error state */ 572 + void rxe_qp_error(struct rxe_qp *qp) 573 + { 574 + qp->req.state = QP_STATE_ERROR; 575 + qp->resp.state = QP_STATE_ERROR; 576 + 577 + /* drain work and packet queues */ 578 + rxe_run_task(&qp->resp.task, 1); 579 + 580 + if (qp_type(qp) == IB_QPT_RC) 581 + rxe_run_task(&qp->comp.task, 1); 582 + else 583 + __rxe_do_task(&qp->comp.task); 584 + rxe_run_task(&qp->req.task, 1); 585 + } 586 + 587 + /* called by the modify qp verb */ 588 + int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, 589 + struct ib_udata *udata) 590 + { 591 + int err; 592 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 593 + union ib_gid sgid; 594 + struct ib_gid_attr sgid_attr; 595 + 596 + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { 597 + int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); 598 + 599 + free_rd_atomic_resources(qp); 600 + 601 + err = alloc_rd_atomic_resources(qp, max_rd_atomic); 602 + if (err) 603 + return err; 604 + 605 + qp->attr.max_rd_atomic = max_rd_atomic; 606 + atomic_set(&qp->req.rd_atomic, max_rd_atomic); 607 + } 608 + 609 + if (mask & IB_QP_CUR_STATE) 610 + qp->attr.cur_qp_state = attr->qp_state; 611 + 612 + if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) 613 + qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; 614 + 615 + if (mask & IB_QP_ACCESS_FLAGS) 616 + qp->attr.qp_access_flags = attr->qp_access_flags; 617 + 618 + if (mask & IB_QP_PKEY_INDEX) 619 + qp->attr.pkey_index = attr->pkey_index; 620 + 621 + if (mask & IB_QP_PORT) 622 + qp->attr.port_num = attr->port_num; 623 + 624 + if (mask & IB_QP_QKEY) 625 + qp->attr.qkey = attr->qkey; 626 + 627 + if (mask & IB_QP_AV) { 628 + ib_get_cached_gid(&rxe->ib_dev, 1, 629 + attr->ah_attr.grh.sgid_index, &sgid, 630 + &sgid_attr); 631 + rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av, 632 + &attr->ah_attr); 633 + rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr, 634 + &sgid_attr, &sgid); 635 + if (sgid_attr.ndev) 636 + dev_put(sgid_attr.ndev); 637 + } 638 + 639 + if (mask & IB_QP_ALT_PATH) { 640 + ib_get_cached_gid(&rxe->ib_dev, 1, 641 + attr->alt_ah_attr.grh.sgid_index, &sgid, 642 + &sgid_attr); 643 + 644 + rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av, 645 + &attr->alt_ah_attr); 646 + rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr, 647 + &sgid_attr, &sgid); 648 + if (sgid_attr.ndev) 649 + dev_put(sgid_attr.ndev); 650 + 651 + qp->attr.alt_port_num = attr->alt_port_num; 652 + qp->attr.alt_pkey_index = attr->alt_pkey_index; 653 + qp->attr.alt_timeout = attr->alt_timeout; 654 + } 655 + 656 + if (mask & IB_QP_PATH_MTU) { 657 + qp->attr.path_mtu = attr->path_mtu; 658 + qp->mtu = ib_mtu_enum_to_int(attr->path_mtu); 659 + } 660 + 661 + if (mask & IB_QP_TIMEOUT) { 662 + qp->attr.timeout = attr->timeout; 663 + if (attr->timeout == 0) { 664 + qp->qp_timeout_jiffies = 0; 665 + } else { 666 + /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */ 667 + int j = nsecs_to_jiffies(4096ULL << attr->timeout); 668 + 669 + qp->qp_timeout_jiffies = j ? j : 1; 670 + } 671 + } 672 + 673 + if (mask & IB_QP_RETRY_CNT) { 674 + qp->attr.retry_cnt = attr->retry_cnt; 675 + qp->comp.retry_cnt = attr->retry_cnt; 676 + pr_debug("set retry count = %d\n", attr->retry_cnt); 677 + } 678 + 679 + if (mask & IB_QP_RNR_RETRY) { 680 + qp->attr.rnr_retry = attr->rnr_retry; 681 + qp->comp.rnr_retry = attr->rnr_retry; 682 + pr_debug("set rnr retry count = %d\n", attr->rnr_retry); 683 + } 684 + 685 + if (mask & IB_QP_RQ_PSN) { 686 + qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); 687 + qp->resp.psn = qp->attr.rq_psn; 688 + pr_debug("set resp psn = 0x%x\n", qp->resp.psn); 689 + } 690 + 691 + if (mask & IB_QP_MIN_RNR_TIMER) { 692 + qp->attr.min_rnr_timer = attr->min_rnr_timer; 693 + pr_debug("set min rnr timer = 0x%x\n", 694 + attr->min_rnr_timer); 695 + } 696 + 697 + if (mask & IB_QP_SQ_PSN) { 698 + qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); 699 + qp->req.psn = qp->attr.sq_psn; 700 + qp->comp.psn = qp->attr.sq_psn; 701 + pr_debug("set req psn = 0x%x\n", qp->req.psn); 702 + } 703 + 704 + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { 705 + qp->attr.max_dest_rd_atomic = 706 + __roundup_pow_of_two(attr->max_dest_rd_atomic); 707 + } 708 + 709 + if (mask & IB_QP_PATH_MIG_STATE) 710 + qp->attr.path_mig_state = attr->path_mig_state; 711 + 712 + if (mask & IB_QP_DEST_QPN) 713 + qp->attr.dest_qp_num = attr->dest_qp_num; 714 + 715 + if (mask & IB_QP_STATE) { 716 + qp->attr.qp_state = attr->qp_state; 717 + 718 + switch (attr->qp_state) { 719 + case IB_QPS_RESET: 720 + pr_debug("qp state -> RESET\n"); 721 + rxe_qp_reset(qp); 722 + break; 723 + 724 + case IB_QPS_INIT: 725 + pr_debug("qp state -> INIT\n"); 726 + qp->req.state = QP_STATE_INIT; 727 + qp->resp.state = QP_STATE_INIT; 728 + break; 729 + 730 + case IB_QPS_RTR: 731 + pr_debug("qp state -> RTR\n"); 732 + qp->resp.state = QP_STATE_READY; 733 + break; 734 + 735 + case IB_QPS_RTS: 736 + pr_debug("qp state -> RTS\n"); 737 + qp->req.state = QP_STATE_READY; 738 + break; 739 + 740 + case IB_QPS_SQD: 741 + pr_debug("qp state -> SQD\n"); 742 + rxe_qp_drain(qp); 743 + break; 744 + 745 + case IB_QPS_SQE: 746 + pr_warn("qp state -> SQE !!?\n"); 747 + /* Not possible from modify_qp. */ 748 + break; 749 + 750 + case IB_QPS_ERR: 751 + pr_debug("qp state -> ERR\n"); 752 + rxe_qp_error(qp); 753 + break; 754 + } 755 + } 756 + 757 + return 0; 758 + } 759 + 760 + /* called by the query qp verb */ 761 + int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) 762 + { 763 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 764 + 765 + *attr = qp->attr; 766 + 767 + attr->rq_psn = qp->resp.psn; 768 + attr->sq_psn = qp->req.psn; 769 + 770 + attr->cap.max_send_wr = qp->sq.max_wr; 771 + attr->cap.max_send_sge = qp->sq.max_sge; 772 + attr->cap.max_inline_data = qp->sq.max_inline; 773 + 774 + if (!qp->srq) { 775 + attr->cap.max_recv_wr = qp->rq.max_wr; 776 + attr->cap.max_recv_sge = qp->rq.max_sge; 777 + } 778 + 779 + rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr); 780 + rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr); 781 + 782 + if (qp->req.state == QP_STATE_DRAIN) { 783 + attr->sq_draining = 1; 784 + /* applications that get this state 785 + * typically spin on it. yield the 786 + * processor 787 + */ 788 + cond_resched(); 789 + } else { 790 + attr->sq_draining = 0; 791 + } 792 + 793 + pr_debug("attr->sq_draining = %d\n", attr->sq_draining); 794 + 795 + return 0; 796 + } 797 + 798 + /* called by the destroy qp verb */ 799 + void rxe_qp_destroy(struct rxe_qp *qp) 800 + { 801 + qp->valid = 0; 802 + qp->qp_timeout_jiffies = 0; 803 + rxe_cleanup_task(&qp->resp.task); 804 + 805 + del_timer_sync(&qp->retrans_timer); 806 + del_timer_sync(&qp->rnr_nak_timer); 807 + 808 + rxe_cleanup_task(&qp->req.task); 809 + if (qp_type(qp) == IB_QPT_RC) 810 + rxe_cleanup_task(&qp->comp.task); 811 + 812 + /* flush out any receive wr's or pending requests */ 813 + __rxe_do_task(&qp->req.task); 814 + if (qp->sq.queue) { 815 + __rxe_do_task(&qp->comp.task); 816 + __rxe_do_task(&qp->req.task); 817 + } 818 + } 819 + 820 + /* called when the last reference to the qp is dropped */ 821 + void rxe_qp_cleanup(void *arg) 822 + { 823 + struct rxe_qp *qp = arg; 824 + 825 + rxe_drop_all_mcast_groups(qp); 826 + 827 + if (qp->sq.queue) 828 + rxe_queue_cleanup(qp->sq.queue); 829 + 830 + if (qp->srq) 831 + rxe_drop_ref(qp->srq); 832 + 833 + if (qp->rq.queue) 834 + rxe_queue_cleanup(qp->rq.queue); 835 + 836 + if (qp->scq) 837 + rxe_drop_ref(qp->scq); 838 + if (qp->rcq) 839 + rxe_drop_ref(qp->rcq); 840 + if (qp->pd) 841 + rxe_drop_ref(qp->pd); 842 + 843 + if (qp->resp.mr) { 844 + rxe_drop_ref(qp->resp.mr); 845 + qp->resp.mr = NULL; 846 + } 847 + 848 + free_rd_atomic_resources(qp); 849 + 850 + kernel_sock_shutdown(qp->sk, SHUT_RDWR); 851 + }
+217
drivers/infiniband/sw/rxe/rxe_queue.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must retailuce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/vmalloc.h> 35 + #include "rxe.h" 36 + #include "rxe_loc.h" 37 + #include "rxe_queue.h" 38 + 39 + int do_mmap_info(struct rxe_dev *rxe, 40 + struct ib_udata *udata, 41 + bool is_req, 42 + struct ib_ucontext *context, 43 + struct rxe_queue_buf *buf, 44 + size_t buf_size, 45 + struct rxe_mmap_info **ip_p) 46 + { 47 + int err; 48 + u32 len, offset; 49 + struct rxe_mmap_info *ip = NULL; 50 + 51 + if (udata) { 52 + if (is_req) { 53 + len = udata->outlen - sizeof(struct mminfo); 54 + offset = sizeof(struct mminfo); 55 + } else { 56 + len = udata->outlen; 57 + offset = 0; 58 + } 59 + 60 + if (len < sizeof(ip->info)) 61 + goto err1; 62 + 63 + ip = rxe_create_mmap_info(rxe, buf_size, context, buf); 64 + if (!ip) 65 + goto err1; 66 + 67 + err = copy_to_user(udata->outbuf + offset, &ip->info, 68 + sizeof(ip->info)); 69 + if (err) 70 + goto err2; 71 + 72 + spin_lock_bh(&rxe->pending_lock); 73 + list_add(&ip->pending_mmaps, &rxe->pending_mmaps); 74 + spin_unlock_bh(&rxe->pending_lock); 75 + } 76 + 77 + *ip_p = ip; 78 + 79 + return 0; 80 + 81 + err2: 82 + kfree(ip); 83 + err1: 84 + return -EINVAL; 85 + } 86 + 87 + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, 88 + int *num_elem, 89 + unsigned int elem_size) 90 + { 91 + struct rxe_queue *q; 92 + size_t buf_size; 93 + unsigned int num_slots; 94 + 95 + /* num_elem == 0 is allowed, but uninteresting */ 96 + if (*num_elem < 0) 97 + goto err1; 98 + 99 + q = kmalloc(sizeof(*q), GFP_KERNEL); 100 + if (!q) 101 + goto err1; 102 + 103 + q->rxe = rxe; 104 + 105 + /* used in resize, only need to copy used part of queue */ 106 + q->elem_size = elem_size; 107 + 108 + /* pad element up to at least a cacheline and always a power of 2 */ 109 + if (elem_size < cache_line_size()) 110 + elem_size = cache_line_size(); 111 + elem_size = roundup_pow_of_two(elem_size); 112 + 113 + q->log2_elem_size = order_base_2(elem_size); 114 + 115 + num_slots = *num_elem + 1; 116 + num_slots = roundup_pow_of_two(num_slots); 117 + q->index_mask = num_slots - 1; 118 + 119 + buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size; 120 + 121 + q->buf = vmalloc_user(buf_size); 122 + if (!q->buf) 123 + goto err2; 124 + 125 + q->buf->log2_elem_size = q->log2_elem_size; 126 + q->buf->index_mask = q->index_mask; 127 + 128 + q->buf_size = buf_size; 129 + 130 + *num_elem = num_slots - 1; 131 + return q; 132 + 133 + err2: 134 + kfree(q); 135 + err1: 136 + return NULL; 137 + } 138 + 139 + /* copies elements from original q to new q and then swaps the contents of the 140 + * two q headers. This is so that if anyone is holding a pointer to q it will 141 + * still work 142 + */ 143 + static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, 144 + unsigned int num_elem) 145 + { 146 + if (!queue_empty(q) && (num_elem < queue_count(q))) 147 + return -EINVAL; 148 + 149 + while (!queue_empty(q)) { 150 + memcpy(producer_addr(new_q), consumer_addr(q), 151 + new_q->elem_size); 152 + advance_producer(new_q); 153 + advance_consumer(q); 154 + } 155 + 156 + swap(*q, *new_q); 157 + 158 + return 0; 159 + } 160 + 161 + int rxe_queue_resize(struct rxe_queue *q, 162 + unsigned int *num_elem_p, 163 + unsigned int elem_size, 164 + struct ib_ucontext *context, 165 + struct ib_udata *udata, 166 + spinlock_t *producer_lock, 167 + spinlock_t *consumer_lock) 168 + { 169 + struct rxe_queue *new_q; 170 + unsigned int num_elem = *num_elem_p; 171 + int err; 172 + unsigned long flags = 0, flags1; 173 + 174 + new_q = rxe_queue_init(q->rxe, &num_elem, elem_size); 175 + if (!new_q) 176 + return -ENOMEM; 177 + 178 + err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf, 179 + new_q->buf_size, &new_q->ip); 180 + if (err) { 181 + vfree(new_q->buf); 182 + kfree(new_q); 183 + goto err1; 184 + } 185 + 186 + spin_lock_irqsave(consumer_lock, flags1); 187 + 188 + if (producer_lock) { 189 + spin_lock_irqsave(producer_lock, flags); 190 + err = resize_finish(q, new_q, num_elem); 191 + spin_unlock_irqrestore(producer_lock, flags); 192 + } else { 193 + err = resize_finish(q, new_q, num_elem); 194 + } 195 + 196 + spin_unlock_irqrestore(consumer_lock, flags1); 197 + 198 + rxe_queue_cleanup(new_q); /* new/old dep on err */ 199 + if (err) 200 + goto err1; 201 + 202 + *num_elem_p = num_elem; 203 + return 0; 204 + 205 + err1: 206 + return err; 207 + } 208 + 209 + void rxe_queue_cleanup(struct rxe_queue *q) 210 + { 211 + if (q->ip) 212 + kref_put(&q->ip->ref, rxe_mmap_release); 213 + else 214 + vfree(q->buf); 215 + 216 + kfree(q); 217 + }
+178
drivers/infiniband/sw/rxe/rxe_queue.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_QUEUE_H 35 + #define RXE_QUEUE_H 36 + 37 + /* implements a simple circular buffer that can optionally be 38 + * shared between user space and the kernel and can be resized 39 + 40 + * the requested element size is rounded up to a power of 2 41 + * and the number of elements in the buffer is also rounded 42 + * up to a power of 2. Since the queue is empty when the 43 + * producer and consumer indices match the maximum capacity 44 + * of the queue is one less than the number of element slots 45 + */ 46 + 47 + /* this data structure is shared between user space and kernel 48 + * space for those cases where the queue is shared. It contains 49 + * the producer and consumer indices. Is also contains a copy 50 + * of the queue size parameters for user space to use but the 51 + * kernel must use the parameters in the rxe_queue struct 52 + * this MUST MATCH the corresponding librxe struct 53 + * for performance reasons arrange to have producer and consumer 54 + * pointers in separate cache lines 55 + * the kernel should always mask the indices to avoid accessing 56 + * memory outside of the data area 57 + */ 58 + struct rxe_queue_buf { 59 + __u32 log2_elem_size; 60 + __u32 index_mask; 61 + __u32 pad_1[30]; 62 + __u32 producer_index; 63 + __u32 pad_2[31]; 64 + __u32 consumer_index; 65 + __u32 pad_3[31]; 66 + __u8 data[0]; 67 + }; 68 + 69 + struct rxe_queue { 70 + struct rxe_dev *rxe; 71 + struct rxe_queue_buf *buf; 72 + struct rxe_mmap_info *ip; 73 + size_t buf_size; 74 + size_t elem_size; 75 + unsigned int log2_elem_size; 76 + unsigned int index_mask; 77 + }; 78 + 79 + int do_mmap_info(struct rxe_dev *rxe, 80 + struct ib_udata *udata, 81 + bool is_req, 82 + struct ib_ucontext *context, 83 + struct rxe_queue_buf *buf, 84 + size_t buf_size, 85 + struct rxe_mmap_info **ip_p); 86 + 87 + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, 88 + int *num_elem, 89 + unsigned int elem_size); 90 + 91 + int rxe_queue_resize(struct rxe_queue *q, 92 + unsigned int *num_elem_p, 93 + unsigned int elem_size, 94 + struct ib_ucontext *context, 95 + struct ib_udata *udata, 96 + /* Protect producers while resizing queue */ 97 + spinlock_t *producer_lock, 98 + /* Protect consumers while resizing queue */ 99 + spinlock_t *consumer_lock); 100 + 101 + void rxe_queue_cleanup(struct rxe_queue *queue); 102 + 103 + static inline int next_index(struct rxe_queue *q, int index) 104 + { 105 + return (index + 1) & q->buf->index_mask; 106 + } 107 + 108 + static inline int queue_empty(struct rxe_queue *q) 109 + { 110 + return ((q->buf->producer_index - q->buf->consumer_index) 111 + & q->index_mask) == 0; 112 + } 113 + 114 + static inline int queue_full(struct rxe_queue *q) 115 + { 116 + return ((q->buf->producer_index + 1 - q->buf->consumer_index) 117 + & q->index_mask) == 0; 118 + } 119 + 120 + static inline void advance_producer(struct rxe_queue *q) 121 + { 122 + q->buf->producer_index = (q->buf->producer_index + 1) 123 + & q->index_mask; 124 + } 125 + 126 + static inline void advance_consumer(struct rxe_queue *q) 127 + { 128 + q->buf->consumer_index = (q->buf->consumer_index + 1) 129 + & q->index_mask; 130 + } 131 + 132 + static inline void *producer_addr(struct rxe_queue *q) 133 + { 134 + return q->buf->data + ((q->buf->producer_index & q->index_mask) 135 + << q->log2_elem_size); 136 + } 137 + 138 + static inline void *consumer_addr(struct rxe_queue *q) 139 + { 140 + return q->buf->data + ((q->buf->consumer_index & q->index_mask) 141 + << q->log2_elem_size); 142 + } 143 + 144 + static inline unsigned int producer_index(struct rxe_queue *q) 145 + { 146 + return q->buf->producer_index; 147 + } 148 + 149 + static inline unsigned int consumer_index(struct rxe_queue *q) 150 + { 151 + return q->buf->consumer_index; 152 + } 153 + 154 + static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) 155 + { 156 + return q->buf->data + ((index & q->index_mask) 157 + << q->buf->log2_elem_size); 158 + } 159 + 160 + static inline unsigned int index_from_addr(const struct rxe_queue *q, 161 + const void *addr) 162 + { 163 + return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) 164 + & q->index_mask; 165 + } 166 + 167 + static inline unsigned int queue_count(const struct rxe_queue *q) 168 + { 169 + return (q->buf->producer_index - q->buf->consumer_index) 170 + & q->index_mask; 171 + } 172 + 173 + static inline void *queue_head(struct rxe_queue *q) 174 + { 175 + return queue_empty(q) ? NULL : consumer_addr(q); 176 + } 177 + 178 + #endif /* RXE_QUEUE_H */
+420
drivers/infiniband/sw/rxe/rxe_recv.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + 36 + #include "rxe.h" 37 + #include "rxe_loc.h" 38 + 39 + static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 40 + struct rxe_qp *qp) 41 + { 42 + if (unlikely(!qp->valid)) 43 + goto err1; 44 + 45 + switch (qp_type(qp)) { 46 + case IB_QPT_RC: 47 + if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { 48 + pr_warn_ratelimited("bad qp type\n"); 49 + goto err1; 50 + } 51 + break; 52 + case IB_QPT_UC: 53 + if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { 54 + pr_warn_ratelimited("bad qp type\n"); 55 + goto err1; 56 + } 57 + break; 58 + case IB_QPT_UD: 59 + case IB_QPT_SMI: 60 + case IB_QPT_GSI: 61 + if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { 62 + pr_warn_ratelimited("bad qp type\n"); 63 + goto err1; 64 + } 65 + break; 66 + default: 67 + pr_warn_ratelimited("unsupported qp type\n"); 68 + goto err1; 69 + } 70 + 71 + if (pkt->mask & RXE_REQ_MASK) { 72 + if (unlikely(qp->resp.state != QP_STATE_READY)) 73 + goto err1; 74 + } else if (unlikely(qp->req.state < QP_STATE_READY || 75 + qp->req.state > QP_STATE_DRAINED)) { 76 + goto err1; 77 + } 78 + 79 + return 0; 80 + 81 + err1: 82 + return -EINVAL; 83 + } 84 + 85 + static void set_bad_pkey_cntr(struct rxe_port *port) 86 + { 87 + spin_lock_bh(&port->port_lock); 88 + port->attr.bad_pkey_cntr = min((u32)0xffff, 89 + port->attr.bad_pkey_cntr + 1); 90 + spin_unlock_bh(&port->port_lock); 91 + } 92 + 93 + static void set_qkey_viol_cntr(struct rxe_port *port) 94 + { 95 + spin_lock_bh(&port->port_lock); 96 + port->attr.qkey_viol_cntr = min((u32)0xffff, 97 + port->attr.qkey_viol_cntr + 1); 98 + spin_unlock_bh(&port->port_lock); 99 + } 100 + 101 + static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 102 + u32 qpn, struct rxe_qp *qp) 103 + { 104 + int i; 105 + int found_pkey = 0; 106 + struct rxe_port *port = &rxe->port; 107 + u16 pkey = bth_pkey(pkt); 108 + 109 + pkt->pkey_index = 0; 110 + 111 + if (qpn == 1) { 112 + for (i = 0; i < port->attr.pkey_tbl_len; i++) { 113 + if (pkey_match(pkey, port->pkey_tbl[i])) { 114 + pkt->pkey_index = i; 115 + found_pkey = 1; 116 + break; 117 + } 118 + } 119 + 120 + if (!found_pkey) { 121 + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); 122 + set_bad_pkey_cntr(port); 123 + goto err1; 124 + } 125 + } else if (qpn != 0) { 126 + if (unlikely(!pkey_match(pkey, 127 + port->pkey_tbl[qp->attr.pkey_index] 128 + ))) { 129 + pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); 130 + set_bad_pkey_cntr(port); 131 + goto err1; 132 + } 133 + pkt->pkey_index = qp->attr.pkey_index; 134 + } 135 + 136 + if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && 137 + qpn != 0 && pkt->mask) { 138 + u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; 139 + 140 + if (unlikely(deth_qkey(pkt) != qkey)) { 141 + pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", 142 + deth_qkey(pkt), qkey, qpn); 143 + set_qkey_viol_cntr(port); 144 + goto err1; 145 + } 146 + } 147 + 148 + return 0; 149 + 150 + err1: 151 + return -EINVAL; 152 + } 153 + 154 + static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 155 + struct rxe_qp *qp) 156 + { 157 + struct sk_buff *skb = PKT_TO_SKB(pkt); 158 + 159 + if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) 160 + goto done; 161 + 162 + if (unlikely(pkt->port_num != qp->attr.port_num)) { 163 + pr_warn_ratelimited("port %d != qp port %d\n", 164 + pkt->port_num, qp->attr.port_num); 165 + goto err1; 166 + } 167 + 168 + if (skb->protocol == htons(ETH_P_IP)) { 169 + struct in_addr *saddr = 170 + &qp->pri_av.sgid_addr._sockaddr_in.sin_addr; 171 + struct in_addr *daddr = 172 + &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; 173 + 174 + if (ip_hdr(skb)->daddr != saddr->s_addr) { 175 + pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", 176 + &ip_hdr(skb)->daddr, 177 + &saddr->s_addr); 178 + goto err1; 179 + } 180 + 181 + if (ip_hdr(skb)->saddr != daddr->s_addr) { 182 + pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", 183 + &ip_hdr(skb)->saddr, 184 + &daddr->s_addr); 185 + goto err1; 186 + } 187 + 188 + } else if (skb->protocol == htons(ETH_P_IPV6)) { 189 + struct in6_addr *saddr = 190 + &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr; 191 + struct in6_addr *daddr = 192 + &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; 193 + 194 + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { 195 + pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", 196 + &ipv6_hdr(skb)->daddr, saddr); 197 + goto err1; 198 + } 199 + 200 + if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { 201 + pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", 202 + &ipv6_hdr(skb)->saddr, daddr); 203 + goto err1; 204 + } 205 + } 206 + 207 + done: 208 + return 0; 209 + 210 + err1: 211 + return -EINVAL; 212 + } 213 + 214 + static int hdr_check(struct rxe_pkt_info *pkt) 215 + { 216 + struct rxe_dev *rxe = pkt->rxe; 217 + struct rxe_port *port = &rxe->port; 218 + struct rxe_qp *qp = NULL; 219 + u32 qpn = bth_qpn(pkt); 220 + int index; 221 + int err; 222 + 223 + if (unlikely(bth_tver(pkt) != BTH_TVER)) { 224 + pr_warn_ratelimited("bad tver\n"); 225 + goto err1; 226 + } 227 + 228 + if (qpn != IB_MULTICAST_QPN) { 229 + index = (qpn == 0) ? port->qp_smi_index : 230 + ((qpn == 1) ? port->qp_gsi_index : qpn); 231 + qp = rxe_pool_get_index(&rxe->qp_pool, index); 232 + if (unlikely(!qp)) { 233 + pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); 234 + goto err1; 235 + } 236 + 237 + err = check_type_state(rxe, pkt, qp); 238 + if (unlikely(err)) 239 + goto err2; 240 + 241 + err = check_addr(rxe, pkt, qp); 242 + if (unlikely(err)) 243 + goto err2; 244 + 245 + err = check_keys(rxe, pkt, qpn, qp); 246 + if (unlikely(err)) 247 + goto err2; 248 + } else { 249 + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { 250 + pr_warn_ratelimited("no grh for mcast qpn\n"); 251 + goto err1; 252 + } 253 + } 254 + 255 + pkt->qp = qp; 256 + return 0; 257 + 258 + err2: 259 + if (qp) 260 + rxe_drop_ref(qp); 261 + err1: 262 + return -EINVAL; 263 + } 264 + 265 + static inline void rxe_rcv_pkt(struct rxe_dev *rxe, 266 + struct rxe_pkt_info *pkt, 267 + struct sk_buff *skb) 268 + { 269 + if (pkt->mask & RXE_REQ_MASK) 270 + rxe_resp_queue_pkt(rxe, pkt->qp, skb); 271 + else 272 + rxe_comp_queue_pkt(rxe, pkt->qp, skb); 273 + } 274 + 275 + static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) 276 + { 277 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 278 + struct rxe_mc_grp *mcg; 279 + struct sk_buff *skb_copy; 280 + struct rxe_mc_elem *mce; 281 + struct rxe_qp *qp; 282 + union ib_gid dgid; 283 + int err; 284 + 285 + if (skb->protocol == htons(ETH_P_IP)) 286 + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, 287 + (struct in6_addr *)&dgid); 288 + else if (skb->protocol == htons(ETH_P_IPV6)) 289 + memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); 290 + 291 + /* lookup mcast group corresponding to mgid, takes a ref */ 292 + mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); 293 + if (!mcg) 294 + goto err1; /* mcast group not registered */ 295 + 296 + spin_lock_bh(&mcg->mcg_lock); 297 + 298 + list_for_each_entry(mce, &mcg->qp_list, qp_list) { 299 + qp = mce->qp; 300 + pkt = SKB_TO_PKT(skb); 301 + 302 + /* validate qp for incoming packet */ 303 + err = check_type_state(rxe, pkt, qp); 304 + if (err) 305 + continue; 306 + 307 + err = check_keys(rxe, pkt, bth_qpn(pkt), qp); 308 + if (err) 309 + continue; 310 + 311 + /* if *not* the last qp in the list 312 + * make a copy of the skb to post to the next qp 313 + */ 314 + skb_copy = (mce->qp_list.next != &mcg->qp_list) ? 315 + skb_clone(skb, GFP_KERNEL) : NULL; 316 + 317 + pkt->qp = qp; 318 + rxe_add_ref(qp); 319 + rxe_rcv_pkt(rxe, pkt, skb); 320 + 321 + skb = skb_copy; 322 + if (!skb) 323 + break; 324 + } 325 + 326 + spin_unlock_bh(&mcg->mcg_lock); 327 + 328 + rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ 329 + 330 + err1: 331 + if (skb) 332 + kfree_skb(skb); 333 + } 334 + 335 + static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) 336 + { 337 + union ib_gid dgid; 338 + union ib_gid *pdgid; 339 + u16 index; 340 + 341 + if (skb->protocol == htons(ETH_P_IP)) { 342 + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, 343 + (struct in6_addr *)&dgid); 344 + pdgid = &dgid; 345 + } else { 346 + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; 347 + } 348 + 349 + return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, 350 + IB_GID_TYPE_ROCE_UDP_ENCAP, 351 + 1, rxe->ndev, &index); 352 + } 353 + 354 + /* rxe_rcv is called from the interface driver */ 355 + int rxe_rcv(struct sk_buff *skb) 356 + { 357 + int err; 358 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 359 + struct rxe_dev *rxe = pkt->rxe; 360 + __be32 *icrcp; 361 + u32 calc_icrc, pack_icrc; 362 + 363 + pkt->offset = 0; 364 + 365 + if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) 366 + goto drop; 367 + 368 + if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { 369 + pr_warn_ratelimited("failed matching dgid\n"); 370 + goto drop; 371 + } 372 + 373 + pkt->opcode = bth_opcode(pkt); 374 + pkt->psn = bth_psn(pkt); 375 + pkt->qp = NULL; 376 + pkt->mask |= rxe_opcode[pkt->opcode].mask; 377 + 378 + if (unlikely(skb->len < header_size(pkt))) 379 + goto drop; 380 + 381 + err = hdr_check(pkt); 382 + if (unlikely(err)) 383 + goto drop; 384 + 385 + /* Verify ICRC */ 386 + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); 387 + pack_icrc = be32_to_cpu(*icrcp); 388 + 389 + calc_icrc = rxe_icrc_hdr(pkt, skb); 390 + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); 391 + calc_icrc = cpu_to_be32(~calc_icrc); 392 + if (unlikely(calc_icrc != pack_icrc)) { 393 + char saddr[sizeof(struct in6_addr)]; 394 + 395 + if (skb->protocol == htons(ETH_P_IPV6)) 396 + sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); 397 + else if (skb->protocol == htons(ETH_P_IP)) 398 + sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); 399 + else 400 + sprintf(saddr, "unknown"); 401 + 402 + pr_warn_ratelimited("bad ICRC from %s\n", saddr); 403 + goto drop; 404 + } 405 + 406 + if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) 407 + rxe_rcv_mcast_pkt(rxe, skb); 408 + else 409 + rxe_rcv_pkt(rxe, pkt, skb); 410 + 411 + return 0; 412 + 413 + drop: 414 + if (pkt->qp) 415 + rxe_drop_ref(pkt->qp); 416 + 417 + kfree_skb(skb); 418 + return 0; 419 + } 420 + EXPORT_SYMBOL(rxe_rcv);
+726
drivers/infiniband/sw/rxe/rxe_req.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + 36 + #include "rxe.h" 37 + #include "rxe_loc.h" 38 + #include "rxe_queue.h" 39 + 40 + static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 41 + unsigned opcode); 42 + 43 + static inline void retry_first_write_send(struct rxe_qp *qp, 44 + struct rxe_send_wqe *wqe, 45 + unsigned mask, int npsn) 46 + { 47 + int i; 48 + 49 + for (i = 0; i < npsn; i++) { 50 + int to_send = (wqe->dma.resid > qp->mtu) ? 51 + qp->mtu : wqe->dma.resid; 52 + 53 + qp->req.opcode = next_opcode(qp, wqe, 54 + wqe->wr.opcode); 55 + 56 + if (wqe->wr.send_flags & IB_SEND_INLINE) { 57 + wqe->dma.resid -= to_send; 58 + wqe->dma.sge_offset += to_send; 59 + } else { 60 + advance_dma_data(&wqe->dma, to_send); 61 + } 62 + if (mask & WR_WRITE_MASK) 63 + wqe->iova += qp->mtu; 64 + } 65 + } 66 + 67 + static void req_retry(struct rxe_qp *qp) 68 + { 69 + struct rxe_send_wqe *wqe; 70 + unsigned int wqe_index; 71 + unsigned int mask; 72 + int npsn; 73 + int first = 1; 74 + 75 + wqe = queue_head(qp->sq.queue); 76 + npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; 77 + 78 + qp->req.wqe_index = consumer_index(qp->sq.queue); 79 + qp->req.psn = qp->comp.psn; 80 + qp->req.opcode = -1; 81 + 82 + for (wqe_index = consumer_index(qp->sq.queue); 83 + wqe_index != producer_index(qp->sq.queue); 84 + wqe_index = next_index(qp->sq.queue, wqe_index)) { 85 + wqe = addr_from_index(qp->sq.queue, wqe_index); 86 + mask = wr_opcode_mask(wqe->wr.opcode, qp); 87 + 88 + if (wqe->state == wqe_state_posted) 89 + break; 90 + 91 + if (wqe->state == wqe_state_done) 92 + continue; 93 + 94 + wqe->iova = (mask & WR_ATOMIC_MASK) ? 95 + wqe->wr.wr.atomic.remote_addr : 96 + (mask & WR_READ_OR_WRITE_MASK) ? 97 + wqe->wr.wr.rdma.remote_addr : 98 + 0; 99 + 100 + if (!first || (mask & WR_READ_MASK) == 0) { 101 + wqe->dma.resid = wqe->dma.length; 102 + wqe->dma.cur_sge = 0; 103 + wqe->dma.sge_offset = 0; 104 + } 105 + 106 + if (first) { 107 + first = 0; 108 + 109 + if (mask & WR_WRITE_OR_SEND_MASK) 110 + retry_first_write_send(qp, wqe, mask, npsn); 111 + 112 + if (mask & WR_READ_MASK) 113 + wqe->iova += npsn * qp->mtu; 114 + } 115 + 116 + wqe->state = wqe_state_posted; 117 + } 118 + } 119 + 120 + void rnr_nak_timer(unsigned long data) 121 + { 122 + struct rxe_qp *qp = (struct rxe_qp *)data; 123 + 124 + pr_debug("rnr nak timer fired\n"); 125 + rxe_run_task(&qp->req.task, 1); 126 + } 127 + 128 + static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) 129 + { 130 + struct rxe_send_wqe *wqe = queue_head(qp->sq.queue); 131 + unsigned long flags; 132 + 133 + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { 134 + /* check to see if we are drained; 135 + * state_lock used by requester and completer 136 + */ 137 + spin_lock_irqsave(&qp->state_lock, flags); 138 + do { 139 + if (qp->req.state != QP_STATE_DRAIN) { 140 + /* comp just finished */ 141 + spin_unlock_irqrestore(&qp->state_lock, 142 + flags); 143 + break; 144 + } 145 + 146 + if (wqe && ((qp->req.wqe_index != 147 + consumer_index(qp->sq.queue)) || 148 + (wqe->state != wqe_state_posted))) { 149 + /* comp not done yet */ 150 + spin_unlock_irqrestore(&qp->state_lock, 151 + flags); 152 + break; 153 + } 154 + 155 + qp->req.state = QP_STATE_DRAINED; 156 + spin_unlock_irqrestore(&qp->state_lock, flags); 157 + 158 + if (qp->ibqp.event_handler) { 159 + struct ib_event ev; 160 + 161 + ev.device = qp->ibqp.device; 162 + ev.element.qp = &qp->ibqp; 163 + ev.event = IB_EVENT_SQ_DRAINED; 164 + qp->ibqp.event_handler(&ev, 165 + qp->ibqp.qp_context); 166 + } 167 + } while (0); 168 + } 169 + 170 + if (qp->req.wqe_index == producer_index(qp->sq.queue)) 171 + return NULL; 172 + 173 + wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index); 174 + 175 + if (unlikely((qp->req.state == QP_STATE_DRAIN || 176 + qp->req.state == QP_STATE_DRAINED) && 177 + (wqe->state != wqe_state_processing))) 178 + return NULL; 179 + 180 + if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && 181 + (qp->req.wqe_index != consumer_index(qp->sq.queue)))) { 182 + qp->req.wait_fence = 1; 183 + return NULL; 184 + } 185 + 186 + wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); 187 + return wqe; 188 + } 189 + 190 + static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) 191 + { 192 + switch (opcode) { 193 + case IB_WR_RDMA_WRITE: 194 + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || 195 + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) 196 + return fits ? 197 + IB_OPCODE_RC_RDMA_WRITE_LAST : 198 + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; 199 + else 200 + return fits ? 201 + IB_OPCODE_RC_RDMA_WRITE_ONLY : 202 + IB_OPCODE_RC_RDMA_WRITE_FIRST; 203 + 204 + case IB_WR_RDMA_WRITE_WITH_IMM: 205 + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || 206 + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) 207 + return fits ? 208 + IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE : 209 + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; 210 + else 211 + return fits ? 212 + IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : 213 + IB_OPCODE_RC_RDMA_WRITE_FIRST; 214 + 215 + case IB_WR_SEND: 216 + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || 217 + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) 218 + return fits ? 219 + IB_OPCODE_RC_SEND_LAST : 220 + IB_OPCODE_RC_SEND_MIDDLE; 221 + else 222 + return fits ? 223 + IB_OPCODE_RC_SEND_ONLY : 224 + IB_OPCODE_RC_SEND_FIRST; 225 + 226 + case IB_WR_SEND_WITH_IMM: 227 + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || 228 + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) 229 + return fits ? 230 + IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE : 231 + IB_OPCODE_RC_SEND_MIDDLE; 232 + else 233 + return fits ? 234 + IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : 235 + IB_OPCODE_RC_SEND_FIRST; 236 + 237 + case IB_WR_RDMA_READ: 238 + return IB_OPCODE_RC_RDMA_READ_REQUEST; 239 + 240 + case IB_WR_ATOMIC_CMP_AND_SWP: 241 + return IB_OPCODE_RC_COMPARE_SWAP; 242 + 243 + case IB_WR_ATOMIC_FETCH_AND_ADD: 244 + return IB_OPCODE_RC_FETCH_ADD; 245 + 246 + case IB_WR_SEND_WITH_INV: 247 + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || 248 + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) 249 + return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE : 250 + IB_OPCODE_RC_SEND_MIDDLE; 251 + else 252 + return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : 253 + IB_OPCODE_RC_SEND_FIRST; 254 + case IB_WR_REG_MR: 255 + case IB_WR_LOCAL_INV: 256 + return opcode; 257 + } 258 + 259 + return -EINVAL; 260 + } 261 + 262 + static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) 263 + { 264 + switch (opcode) { 265 + case IB_WR_RDMA_WRITE: 266 + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || 267 + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) 268 + return fits ? 269 + IB_OPCODE_UC_RDMA_WRITE_LAST : 270 + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; 271 + else 272 + return fits ? 273 + IB_OPCODE_UC_RDMA_WRITE_ONLY : 274 + IB_OPCODE_UC_RDMA_WRITE_FIRST; 275 + 276 + case IB_WR_RDMA_WRITE_WITH_IMM: 277 + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || 278 + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) 279 + return fits ? 280 + IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE : 281 + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; 282 + else 283 + return fits ? 284 + IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : 285 + IB_OPCODE_UC_RDMA_WRITE_FIRST; 286 + 287 + case IB_WR_SEND: 288 + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || 289 + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) 290 + return fits ? 291 + IB_OPCODE_UC_SEND_LAST : 292 + IB_OPCODE_UC_SEND_MIDDLE; 293 + else 294 + return fits ? 295 + IB_OPCODE_UC_SEND_ONLY : 296 + IB_OPCODE_UC_SEND_FIRST; 297 + 298 + case IB_WR_SEND_WITH_IMM: 299 + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || 300 + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) 301 + return fits ? 302 + IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE : 303 + IB_OPCODE_UC_SEND_MIDDLE; 304 + else 305 + return fits ? 306 + IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE : 307 + IB_OPCODE_UC_SEND_FIRST; 308 + } 309 + 310 + return -EINVAL; 311 + } 312 + 313 + static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 314 + unsigned opcode) 315 + { 316 + int fits = (wqe->dma.resid <= qp->mtu); 317 + 318 + switch (qp_type(qp)) { 319 + case IB_QPT_RC: 320 + return next_opcode_rc(qp, opcode, fits); 321 + 322 + case IB_QPT_UC: 323 + return next_opcode_uc(qp, opcode, fits); 324 + 325 + case IB_QPT_SMI: 326 + case IB_QPT_UD: 327 + case IB_QPT_GSI: 328 + switch (opcode) { 329 + case IB_WR_SEND: 330 + return IB_OPCODE_UD_SEND_ONLY; 331 + 332 + case IB_WR_SEND_WITH_IMM: 333 + return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 334 + } 335 + break; 336 + 337 + default: 338 + break; 339 + } 340 + 341 + return -EINVAL; 342 + } 343 + 344 + static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 345 + { 346 + int depth; 347 + 348 + if (wqe->has_rd_atomic) 349 + return 0; 350 + 351 + qp->req.need_rd_atomic = 1; 352 + depth = atomic_dec_return(&qp->req.rd_atomic); 353 + 354 + if (depth >= 0) { 355 + qp->req.need_rd_atomic = 0; 356 + wqe->has_rd_atomic = 1; 357 + return 0; 358 + } 359 + 360 + atomic_inc(&qp->req.rd_atomic); 361 + return -EAGAIN; 362 + } 363 + 364 + static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 365 + { 366 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 367 + struct rxe_port *port; 368 + struct rxe_av *av; 369 + 370 + if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) 371 + return qp->mtu; 372 + 373 + av = &wqe->av; 374 + port = &rxe->port; 375 + 376 + return port->mtu_cap; 377 + } 378 + 379 + static struct sk_buff *init_req_packet(struct rxe_qp *qp, 380 + struct rxe_send_wqe *wqe, 381 + int opcode, int payload, 382 + struct rxe_pkt_info *pkt) 383 + { 384 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 385 + struct rxe_port *port = &rxe->port; 386 + struct sk_buff *skb; 387 + struct rxe_send_wr *ibwr = &wqe->wr; 388 + struct rxe_av *av; 389 + int pad = (-payload) & 0x3; 390 + int paylen; 391 + int solicited; 392 + u16 pkey; 393 + u32 qp_num; 394 + int ack_req; 395 + 396 + /* length from start of bth to end of icrc */ 397 + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 398 + 399 + /* pkt->hdr, rxe, port_num and mask are initialized in ifc 400 + * layer 401 + */ 402 + pkt->opcode = opcode; 403 + pkt->qp = qp; 404 + pkt->psn = qp->req.psn; 405 + pkt->mask = rxe_opcode[opcode].mask; 406 + pkt->paylen = paylen; 407 + pkt->offset = 0; 408 + pkt->wqe = wqe; 409 + 410 + /* init skb */ 411 + av = rxe_get_av(pkt); 412 + skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt); 413 + if (unlikely(!skb)) 414 + return NULL; 415 + 416 + /* init bth */ 417 + solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && 418 + (pkt->mask & RXE_END_MASK) && 419 + ((pkt->mask & (RXE_SEND_MASK)) || 420 + (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == 421 + (RXE_WRITE_MASK | RXE_IMMDT_MASK)); 422 + 423 + pkey = (qp_type(qp) == IB_QPT_GSI) ? 424 + port->pkey_tbl[ibwr->wr.ud.pkey_index] : 425 + port->pkey_tbl[qp->attr.pkey_index]; 426 + 427 + qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : 428 + qp->attr.dest_qp_num; 429 + 430 + ack_req = ((pkt->mask & RXE_END_MASK) || 431 + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); 432 + if (ack_req) 433 + qp->req.noack_pkts = 0; 434 + 435 + bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, 436 + ack_req, pkt->psn); 437 + 438 + /* init optional headers */ 439 + if (pkt->mask & RXE_RETH_MASK) { 440 + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); 441 + reth_set_va(pkt, wqe->iova); 442 + reth_set_len(pkt, wqe->dma.length); 443 + } 444 + 445 + if (pkt->mask & RXE_IMMDT_MASK) 446 + immdt_set_imm(pkt, ibwr->ex.imm_data); 447 + 448 + if (pkt->mask & RXE_IETH_MASK) 449 + ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); 450 + 451 + if (pkt->mask & RXE_ATMETH_MASK) { 452 + atmeth_set_va(pkt, wqe->iova); 453 + if (opcode == IB_OPCODE_RC_COMPARE_SWAP || 454 + opcode == IB_OPCODE_RD_COMPARE_SWAP) { 455 + atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); 456 + atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); 457 + } else { 458 + atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); 459 + } 460 + atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); 461 + } 462 + 463 + if (pkt->mask & RXE_DETH_MASK) { 464 + if (qp->ibqp.qp_num == 1) 465 + deth_set_qkey(pkt, GSI_QKEY); 466 + else 467 + deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); 468 + deth_set_sqp(pkt, qp->ibqp.qp_num); 469 + } 470 + 471 + return skb; 472 + } 473 + 474 + static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 475 + struct rxe_pkt_info *pkt, struct sk_buff *skb, 476 + int paylen) 477 + { 478 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 479 + u32 crc = 0; 480 + u32 *p; 481 + int err; 482 + 483 + err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc); 484 + if (err) 485 + return err; 486 + 487 + if (pkt->mask & RXE_WRITE_OR_SEND) { 488 + if (wqe->wr.send_flags & IB_SEND_INLINE) { 489 + u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; 490 + 491 + crc = crc32_le(crc, tmp, paylen); 492 + 493 + memcpy(payload_addr(pkt), tmp, paylen); 494 + 495 + wqe->dma.resid -= paylen; 496 + wqe->dma.sge_offset += paylen; 497 + } else { 498 + err = copy_data(rxe, qp->pd, 0, &wqe->dma, 499 + payload_addr(pkt), paylen, 500 + from_mem_obj, 501 + &crc); 502 + if (err) 503 + return err; 504 + } 505 + } 506 + p = payload_addr(pkt) + paylen + bth_pad(pkt); 507 + 508 + *p = ~crc; 509 + 510 + return 0; 511 + } 512 + 513 + static void update_wqe_state(struct rxe_qp *qp, 514 + struct rxe_send_wqe *wqe, 515 + struct rxe_pkt_info *pkt, 516 + enum wqe_state *prev_state) 517 + { 518 + enum wqe_state prev_state_ = wqe->state; 519 + 520 + if (pkt->mask & RXE_END_MASK) { 521 + if (qp_type(qp) == IB_QPT_RC) 522 + wqe->state = wqe_state_pending; 523 + } else { 524 + wqe->state = wqe_state_processing; 525 + } 526 + 527 + *prev_state = prev_state_; 528 + } 529 + 530 + static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 531 + struct rxe_pkt_info *pkt, int payload) 532 + { 533 + /* number of packets left to send including current one */ 534 + int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; 535 + 536 + /* handle zero length packet case */ 537 + if (num_pkt == 0) 538 + num_pkt = 1; 539 + 540 + if (pkt->mask & RXE_START_MASK) { 541 + wqe->first_psn = qp->req.psn; 542 + wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; 543 + } 544 + 545 + if (pkt->mask & RXE_READ_MASK) 546 + qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; 547 + else 548 + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; 549 + 550 + qp->req.opcode = pkt->opcode; 551 + 552 + 553 + if (pkt->mask & RXE_END_MASK) 554 + qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); 555 + 556 + qp->need_req_skb = 0; 557 + 558 + if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer)) 559 + mod_timer(&qp->retrans_timer, 560 + jiffies + qp->qp_timeout_jiffies); 561 + } 562 + 563 + int rxe_requester(void *arg) 564 + { 565 + struct rxe_qp *qp = (struct rxe_qp *)arg; 566 + struct rxe_pkt_info pkt; 567 + struct sk_buff *skb; 568 + struct rxe_send_wqe *wqe; 569 + unsigned mask; 570 + int payload; 571 + int mtu; 572 + int opcode; 573 + int ret; 574 + enum wqe_state prev_state; 575 + 576 + next_wqe: 577 + if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) 578 + goto exit; 579 + 580 + if (unlikely(qp->req.state == QP_STATE_RESET)) { 581 + qp->req.wqe_index = consumer_index(qp->sq.queue); 582 + qp->req.opcode = -1; 583 + qp->req.need_rd_atomic = 0; 584 + qp->req.wait_psn = 0; 585 + qp->req.need_retry = 0; 586 + goto exit; 587 + } 588 + 589 + if (unlikely(qp->req.need_retry)) { 590 + req_retry(qp); 591 + qp->req.need_retry = 0; 592 + } 593 + 594 + wqe = req_next_wqe(qp); 595 + if (unlikely(!wqe)) 596 + goto exit; 597 + 598 + if (wqe->mask & WR_REG_MASK) { 599 + if (wqe->wr.opcode == IB_WR_LOCAL_INV) { 600 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 601 + struct rxe_mem *rmr; 602 + 603 + rmr = rxe_pool_get_index(&rxe->mr_pool, 604 + wqe->wr.ex.invalidate_rkey >> 8); 605 + if (!rmr) { 606 + pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); 607 + wqe->state = wqe_state_error; 608 + wqe->status = IB_WC_MW_BIND_ERR; 609 + goto exit; 610 + } 611 + rmr->state = RXE_MEM_STATE_FREE; 612 + wqe->state = wqe_state_done; 613 + wqe->status = IB_WC_SUCCESS; 614 + } else if (wqe->wr.opcode == IB_WR_REG_MR) { 615 + struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr); 616 + 617 + rmr->state = RXE_MEM_STATE_VALID; 618 + rmr->access = wqe->wr.wr.reg.access; 619 + rmr->lkey = wqe->wr.wr.reg.key; 620 + rmr->rkey = wqe->wr.wr.reg.key; 621 + wqe->state = wqe_state_done; 622 + wqe->status = IB_WC_SUCCESS; 623 + } else { 624 + goto exit; 625 + } 626 + qp->req.wqe_index = next_index(qp->sq.queue, 627 + qp->req.wqe_index); 628 + goto next_wqe; 629 + } 630 + 631 + if (unlikely(qp_type(qp) == IB_QPT_RC && 632 + qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { 633 + qp->req.wait_psn = 1; 634 + goto exit; 635 + } 636 + 637 + /* Limit the number of inflight SKBs per QP */ 638 + if (unlikely(atomic_read(&qp->skb_out) > 639 + RXE_INFLIGHT_SKBS_PER_QP_HIGH)) { 640 + qp->need_req_skb = 1; 641 + goto exit; 642 + } 643 + 644 + opcode = next_opcode(qp, wqe, wqe->wr.opcode); 645 + if (unlikely(opcode < 0)) { 646 + wqe->status = IB_WC_LOC_QP_OP_ERR; 647 + goto exit; 648 + } 649 + 650 + mask = rxe_opcode[opcode].mask; 651 + if (unlikely(mask & RXE_READ_OR_ATOMIC)) { 652 + if (check_init_depth(qp, wqe)) 653 + goto exit; 654 + } 655 + 656 + mtu = get_mtu(qp, wqe); 657 + payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; 658 + if (payload > mtu) { 659 + if (qp_type(qp) == IB_QPT_UD) { 660 + /* C10-93.1.1: If the total sum of all the buffer lengths specified for a 661 + * UD message exceeds the MTU of the port as returned by QueryHCA, the CI 662 + * shall not emit any packets for this message. Further, the CI shall not 663 + * generate an error due to this condition. 664 + */ 665 + 666 + /* fake a successful UD send */ 667 + wqe->first_psn = qp->req.psn; 668 + wqe->last_psn = qp->req.psn; 669 + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; 670 + qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; 671 + qp->req.wqe_index = next_index(qp->sq.queue, 672 + qp->req.wqe_index); 673 + wqe->state = wqe_state_done; 674 + wqe->status = IB_WC_SUCCESS; 675 + goto complete; 676 + } 677 + payload = mtu; 678 + } 679 + 680 + skb = init_req_packet(qp, wqe, opcode, payload, &pkt); 681 + if (unlikely(!skb)) { 682 + pr_err("Failed allocating skb\n"); 683 + goto err; 684 + } 685 + 686 + if (fill_packet(qp, wqe, &pkt, skb, payload)) { 687 + pr_debug("Error during fill packet\n"); 688 + goto err; 689 + } 690 + 691 + update_wqe_state(qp, wqe, &pkt, &prev_state); 692 + ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); 693 + if (ret) { 694 + qp->need_req_skb = 1; 695 + kfree_skb(skb); 696 + 697 + wqe->state = prev_state; 698 + 699 + if (ret == -EAGAIN) { 700 + rxe_run_task(&qp->req.task, 1); 701 + goto exit; 702 + } 703 + 704 + goto err; 705 + } 706 + 707 + update_state(qp, wqe, &pkt, payload); 708 + 709 + goto next_wqe; 710 + 711 + err: 712 + kfree_skb(skb); 713 + wqe->status = IB_WC_LOC_PROT_ERR; 714 + wqe->state = wqe_state_error; 715 + 716 + complete: 717 + if (qp_type(qp) != IB_QPT_RC) { 718 + while (rxe_completer(qp) == 0) 719 + ; 720 + } 721 + 722 + return 0; 723 + 724 + exit: 725 + return -EAGAIN; 726 + }
+1380
drivers/infiniband/sw/rxe/rxe_resp.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/skbuff.h> 35 + 36 + #include "rxe.h" 37 + #include "rxe_loc.h" 38 + #include "rxe_queue.h" 39 + 40 + enum resp_states { 41 + RESPST_NONE, 42 + RESPST_GET_REQ, 43 + RESPST_CHK_PSN, 44 + RESPST_CHK_OP_SEQ, 45 + RESPST_CHK_OP_VALID, 46 + RESPST_CHK_RESOURCE, 47 + RESPST_CHK_LENGTH, 48 + RESPST_CHK_RKEY, 49 + RESPST_EXECUTE, 50 + RESPST_READ_REPLY, 51 + RESPST_COMPLETE, 52 + RESPST_ACKNOWLEDGE, 53 + RESPST_CLEANUP, 54 + RESPST_DUPLICATE_REQUEST, 55 + RESPST_ERR_MALFORMED_WQE, 56 + RESPST_ERR_UNSUPPORTED_OPCODE, 57 + RESPST_ERR_MISALIGNED_ATOMIC, 58 + RESPST_ERR_PSN_OUT_OF_SEQ, 59 + RESPST_ERR_MISSING_OPCODE_FIRST, 60 + RESPST_ERR_MISSING_OPCODE_LAST_C, 61 + RESPST_ERR_MISSING_OPCODE_LAST_D1E, 62 + RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, 63 + RESPST_ERR_RNR, 64 + RESPST_ERR_RKEY_VIOLATION, 65 + RESPST_ERR_LENGTH, 66 + RESPST_ERR_CQ_OVERFLOW, 67 + RESPST_ERROR, 68 + RESPST_RESET, 69 + RESPST_DONE, 70 + RESPST_EXIT, 71 + }; 72 + 73 + static char *resp_state_name[] = { 74 + [RESPST_NONE] = "NONE", 75 + [RESPST_GET_REQ] = "GET_REQ", 76 + [RESPST_CHK_PSN] = "CHK_PSN", 77 + [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 78 + [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 79 + [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 80 + [RESPST_CHK_LENGTH] = "CHK_LENGTH", 81 + [RESPST_CHK_RKEY] = "CHK_RKEY", 82 + [RESPST_EXECUTE] = "EXECUTE", 83 + [RESPST_READ_REPLY] = "READ_REPLY", 84 + [RESPST_COMPLETE] = "COMPLETE", 85 + [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 86 + [RESPST_CLEANUP] = "CLEANUP", 87 + [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 88 + [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 89 + [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 90 + [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 91 + [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 92 + [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 93 + [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 94 + [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 95 + [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 96 + [RESPST_ERR_RNR] = "ERR_RNR", 97 + [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 98 + [RESPST_ERR_LENGTH] = "ERR_LENGTH", 99 + [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 100 + [RESPST_ERROR] = "ERROR", 101 + [RESPST_RESET] = "RESET", 102 + [RESPST_DONE] = "DONE", 103 + [RESPST_EXIT] = "EXIT", 104 + }; 105 + 106 + /* rxe_recv calls here to add a request packet to the input queue */ 107 + void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, 108 + struct sk_buff *skb) 109 + { 110 + int must_sched; 111 + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 112 + 113 + skb_queue_tail(&qp->req_pkts, skb); 114 + 115 + must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 116 + (skb_queue_len(&qp->req_pkts) > 1); 117 + 118 + rxe_run_task(&qp->resp.task, must_sched); 119 + } 120 + 121 + static inline enum resp_states get_req(struct rxe_qp *qp, 122 + struct rxe_pkt_info **pkt_p) 123 + { 124 + struct sk_buff *skb; 125 + 126 + if (qp->resp.state == QP_STATE_ERROR) { 127 + skb = skb_dequeue(&qp->req_pkts); 128 + if (skb) { 129 + /* drain request packet queue */ 130 + rxe_drop_ref(qp); 131 + kfree_skb(skb); 132 + return RESPST_GET_REQ; 133 + } 134 + 135 + /* go drain recv wr queue */ 136 + return RESPST_CHK_RESOURCE; 137 + } 138 + 139 + skb = skb_peek(&qp->req_pkts); 140 + if (!skb) 141 + return RESPST_EXIT; 142 + 143 + *pkt_p = SKB_TO_PKT(skb); 144 + 145 + return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 146 + } 147 + 148 + static enum resp_states check_psn(struct rxe_qp *qp, 149 + struct rxe_pkt_info *pkt) 150 + { 151 + int diff = psn_compare(pkt->psn, qp->resp.psn); 152 + 153 + switch (qp_type(qp)) { 154 + case IB_QPT_RC: 155 + if (diff > 0) { 156 + if (qp->resp.sent_psn_nak) 157 + return RESPST_CLEANUP; 158 + 159 + qp->resp.sent_psn_nak = 1; 160 + return RESPST_ERR_PSN_OUT_OF_SEQ; 161 + 162 + } else if (diff < 0) { 163 + return RESPST_DUPLICATE_REQUEST; 164 + } 165 + 166 + if (qp->resp.sent_psn_nak) 167 + qp->resp.sent_psn_nak = 0; 168 + 169 + break; 170 + 171 + case IB_QPT_UC: 172 + if (qp->resp.drop_msg || diff != 0) { 173 + if (pkt->mask & RXE_START_MASK) { 174 + qp->resp.drop_msg = 0; 175 + return RESPST_CHK_OP_SEQ; 176 + } 177 + 178 + qp->resp.drop_msg = 1; 179 + return RESPST_CLEANUP; 180 + } 181 + break; 182 + default: 183 + break; 184 + } 185 + 186 + return RESPST_CHK_OP_SEQ; 187 + } 188 + 189 + static enum resp_states check_op_seq(struct rxe_qp *qp, 190 + struct rxe_pkt_info *pkt) 191 + { 192 + switch (qp_type(qp)) { 193 + case IB_QPT_RC: 194 + switch (qp->resp.opcode) { 195 + case IB_OPCODE_RC_SEND_FIRST: 196 + case IB_OPCODE_RC_SEND_MIDDLE: 197 + switch (pkt->opcode) { 198 + case IB_OPCODE_RC_SEND_MIDDLE: 199 + case IB_OPCODE_RC_SEND_LAST: 200 + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 201 + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 202 + return RESPST_CHK_OP_VALID; 203 + default: 204 + return RESPST_ERR_MISSING_OPCODE_LAST_C; 205 + } 206 + 207 + case IB_OPCODE_RC_RDMA_WRITE_FIRST: 208 + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 209 + switch (pkt->opcode) { 210 + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 211 + case IB_OPCODE_RC_RDMA_WRITE_LAST: 212 + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 213 + return RESPST_CHK_OP_VALID; 214 + default: 215 + return RESPST_ERR_MISSING_OPCODE_LAST_C; 216 + } 217 + 218 + default: 219 + switch (pkt->opcode) { 220 + case IB_OPCODE_RC_SEND_MIDDLE: 221 + case IB_OPCODE_RC_SEND_LAST: 222 + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 223 + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 224 + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 225 + case IB_OPCODE_RC_RDMA_WRITE_LAST: 226 + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 227 + return RESPST_ERR_MISSING_OPCODE_FIRST; 228 + default: 229 + return RESPST_CHK_OP_VALID; 230 + } 231 + } 232 + break; 233 + 234 + case IB_QPT_UC: 235 + switch (qp->resp.opcode) { 236 + case IB_OPCODE_UC_SEND_FIRST: 237 + case IB_OPCODE_UC_SEND_MIDDLE: 238 + switch (pkt->opcode) { 239 + case IB_OPCODE_UC_SEND_MIDDLE: 240 + case IB_OPCODE_UC_SEND_LAST: 241 + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 242 + return RESPST_CHK_OP_VALID; 243 + default: 244 + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 245 + } 246 + 247 + case IB_OPCODE_UC_RDMA_WRITE_FIRST: 248 + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 249 + switch (pkt->opcode) { 250 + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 251 + case IB_OPCODE_UC_RDMA_WRITE_LAST: 252 + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 253 + return RESPST_CHK_OP_VALID; 254 + default: 255 + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 256 + } 257 + 258 + default: 259 + switch (pkt->opcode) { 260 + case IB_OPCODE_UC_SEND_MIDDLE: 261 + case IB_OPCODE_UC_SEND_LAST: 262 + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 263 + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 264 + case IB_OPCODE_UC_RDMA_WRITE_LAST: 265 + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 266 + qp->resp.drop_msg = 1; 267 + return RESPST_CLEANUP; 268 + default: 269 + return RESPST_CHK_OP_VALID; 270 + } 271 + } 272 + break; 273 + 274 + default: 275 + return RESPST_CHK_OP_VALID; 276 + } 277 + } 278 + 279 + static enum resp_states check_op_valid(struct rxe_qp *qp, 280 + struct rxe_pkt_info *pkt) 281 + { 282 + switch (qp_type(qp)) { 283 + case IB_QPT_RC: 284 + if (((pkt->mask & RXE_READ_MASK) && 285 + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 286 + ((pkt->mask & RXE_WRITE_MASK) && 287 + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 288 + ((pkt->mask & RXE_ATOMIC_MASK) && 289 + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { 290 + return RESPST_ERR_UNSUPPORTED_OPCODE; 291 + } 292 + 293 + break; 294 + 295 + case IB_QPT_UC: 296 + if ((pkt->mask & RXE_WRITE_MASK) && 297 + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 298 + qp->resp.drop_msg = 1; 299 + return RESPST_CLEANUP; 300 + } 301 + 302 + break; 303 + 304 + case IB_QPT_UD: 305 + case IB_QPT_SMI: 306 + case IB_QPT_GSI: 307 + break; 308 + 309 + default: 310 + WARN_ON(1); 311 + break; 312 + } 313 + 314 + return RESPST_CHK_RESOURCE; 315 + } 316 + 317 + static enum resp_states get_srq_wqe(struct rxe_qp *qp) 318 + { 319 + struct rxe_srq *srq = qp->srq; 320 + struct rxe_queue *q = srq->rq.queue; 321 + struct rxe_recv_wqe *wqe; 322 + struct ib_event ev; 323 + 324 + if (srq->error) 325 + return RESPST_ERR_RNR; 326 + 327 + spin_lock_bh(&srq->rq.consumer_lock); 328 + 329 + wqe = queue_head(q); 330 + if (!wqe) { 331 + spin_unlock_bh(&srq->rq.consumer_lock); 332 + return RESPST_ERR_RNR; 333 + } 334 + 335 + /* note kernel and user space recv wqes have same size */ 336 + memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe)); 337 + 338 + qp->resp.wqe = &qp->resp.srq_wqe.wqe; 339 + advance_consumer(q); 340 + 341 + if (srq->limit && srq->ibsrq.event_handler && 342 + (queue_count(q) < srq->limit)) { 343 + srq->limit = 0; 344 + goto event; 345 + } 346 + 347 + spin_unlock_bh(&srq->rq.consumer_lock); 348 + return RESPST_CHK_LENGTH; 349 + 350 + event: 351 + spin_unlock_bh(&srq->rq.consumer_lock); 352 + ev.device = qp->ibqp.device; 353 + ev.element.srq = qp->ibqp.srq; 354 + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 355 + srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 356 + return RESPST_CHK_LENGTH; 357 + } 358 + 359 + static enum resp_states check_resource(struct rxe_qp *qp, 360 + struct rxe_pkt_info *pkt) 361 + { 362 + struct rxe_srq *srq = qp->srq; 363 + 364 + if (qp->resp.state == QP_STATE_ERROR) { 365 + if (qp->resp.wqe) { 366 + qp->resp.status = IB_WC_WR_FLUSH_ERR; 367 + return RESPST_COMPLETE; 368 + } else if (!srq) { 369 + qp->resp.wqe = queue_head(qp->rq.queue); 370 + if (qp->resp.wqe) { 371 + qp->resp.status = IB_WC_WR_FLUSH_ERR; 372 + return RESPST_COMPLETE; 373 + } else { 374 + return RESPST_EXIT; 375 + } 376 + } else { 377 + return RESPST_EXIT; 378 + } 379 + } 380 + 381 + if (pkt->mask & RXE_READ_OR_ATOMIC) { 382 + /* it is the requesters job to not send 383 + * too many read/atomic ops, we just 384 + * recycle the responder resource queue 385 + */ 386 + if (likely(qp->attr.max_rd_atomic > 0)) 387 + return RESPST_CHK_LENGTH; 388 + else 389 + return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 390 + } 391 + 392 + if (pkt->mask & RXE_RWR_MASK) { 393 + if (srq) 394 + return get_srq_wqe(qp); 395 + 396 + qp->resp.wqe = queue_head(qp->rq.queue); 397 + return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 398 + } 399 + 400 + return RESPST_CHK_LENGTH; 401 + } 402 + 403 + static enum resp_states check_length(struct rxe_qp *qp, 404 + struct rxe_pkt_info *pkt) 405 + { 406 + switch (qp_type(qp)) { 407 + case IB_QPT_RC: 408 + return RESPST_CHK_RKEY; 409 + 410 + case IB_QPT_UC: 411 + return RESPST_CHK_RKEY; 412 + 413 + default: 414 + return RESPST_CHK_RKEY; 415 + } 416 + } 417 + 418 + static enum resp_states check_rkey(struct rxe_qp *qp, 419 + struct rxe_pkt_info *pkt) 420 + { 421 + struct rxe_mem *mem; 422 + u64 va; 423 + u32 rkey; 424 + u32 resid; 425 + u32 pktlen; 426 + int mtu = qp->mtu; 427 + enum resp_states state; 428 + int access; 429 + 430 + if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { 431 + if (pkt->mask & RXE_RETH_MASK) { 432 + qp->resp.va = reth_va(pkt); 433 + qp->resp.rkey = reth_rkey(pkt); 434 + qp->resp.resid = reth_len(pkt); 435 + } 436 + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 437 + : IB_ACCESS_REMOTE_WRITE; 438 + } else if (pkt->mask & RXE_ATOMIC_MASK) { 439 + qp->resp.va = atmeth_va(pkt); 440 + qp->resp.rkey = atmeth_rkey(pkt); 441 + qp->resp.resid = sizeof(u64); 442 + access = IB_ACCESS_REMOTE_ATOMIC; 443 + } else { 444 + return RESPST_EXECUTE; 445 + } 446 + 447 + va = qp->resp.va; 448 + rkey = qp->resp.rkey; 449 + resid = qp->resp.resid; 450 + pktlen = payload_size(pkt); 451 + 452 + mem = lookup_mem(qp->pd, access, rkey, lookup_remote); 453 + if (!mem) { 454 + state = RESPST_ERR_RKEY_VIOLATION; 455 + goto err1; 456 + } 457 + 458 + if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { 459 + state = RESPST_ERR_RKEY_VIOLATION; 460 + goto err1; 461 + } 462 + 463 + if (mem_check_range(mem, va, resid)) { 464 + state = RESPST_ERR_RKEY_VIOLATION; 465 + goto err2; 466 + } 467 + 468 + if (pkt->mask & RXE_WRITE_MASK) { 469 + if (resid > mtu) { 470 + if (pktlen != mtu || bth_pad(pkt)) { 471 + state = RESPST_ERR_LENGTH; 472 + goto err2; 473 + } 474 + 475 + resid = mtu; 476 + } else { 477 + if (pktlen != resid) { 478 + state = RESPST_ERR_LENGTH; 479 + goto err2; 480 + } 481 + if ((bth_pad(pkt) != (0x3 & (-resid)))) { 482 + /* This case may not be exactly that 483 + * but nothing else fits. 484 + */ 485 + state = RESPST_ERR_LENGTH; 486 + goto err2; 487 + } 488 + } 489 + } 490 + 491 + WARN_ON(qp->resp.mr); 492 + 493 + qp->resp.mr = mem; 494 + return RESPST_EXECUTE; 495 + 496 + err2: 497 + rxe_drop_ref(mem); 498 + err1: 499 + return state; 500 + } 501 + 502 + static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 503 + int data_len) 504 + { 505 + int err; 506 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 507 + 508 + err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 509 + data_addr, data_len, to_mem_obj, NULL); 510 + if (unlikely(err)) 511 + return (err == -ENOSPC) ? RESPST_ERR_LENGTH 512 + : RESPST_ERR_MALFORMED_WQE; 513 + 514 + return RESPST_NONE; 515 + } 516 + 517 + static enum resp_states write_data_in(struct rxe_qp *qp, 518 + struct rxe_pkt_info *pkt) 519 + { 520 + enum resp_states rc = RESPST_NONE; 521 + int err; 522 + int data_len = payload_size(pkt); 523 + 524 + err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), 525 + data_len, to_mem_obj, NULL); 526 + if (err) { 527 + rc = RESPST_ERR_RKEY_VIOLATION; 528 + goto out; 529 + } 530 + 531 + qp->resp.va += data_len; 532 + qp->resp.resid -= data_len; 533 + 534 + out: 535 + return rc; 536 + } 537 + 538 + /* Guarantee atomicity of atomic operations at the machine level. */ 539 + static DEFINE_SPINLOCK(atomic_ops_lock); 540 + 541 + static enum resp_states process_atomic(struct rxe_qp *qp, 542 + struct rxe_pkt_info *pkt) 543 + { 544 + u64 iova = atmeth_va(pkt); 545 + u64 *vaddr; 546 + enum resp_states ret; 547 + struct rxe_mem *mr = qp->resp.mr; 548 + 549 + if (mr->state != RXE_MEM_STATE_VALID) { 550 + ret = RESPST_ERR_RKEY_VIOLATION; 551 + goto out; 552 + } 553 + 554 + vaddr = iova_to_vaddr(mr, iova, sizeof(u64)); 555 + 556 + /* check vaddr is 8 bytes aligned. */ 557 + if (!vaddr || (uintptr_t)vaddr & 7) { 558 + ret = RESPST_ERR_MISALIGNED_ATOMIC; 559 + goto out; 560 + } 561 + 562 + spin_lock_bh(&atomic_ops_lock); 563 + 564 + qp->resp.atomic_orig = *vaddr; 565 + 566 + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || 567 + pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { 568 + if (*vaddr == atmeth_comp(pkt)) 569 + *vaddr = atmeth_swap_add(pkt); 570 + } else { 571 + *vaddr += atmeth_swap_add(pkt); 572 + } 573 + 574 + spin_unlock_bh(&atomic_ops_lock); 575 + 576 + ret = RESPST_NONE; 577 + out: 578 + return ret; 579 + } 580 + 581 + static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 582 + struct rxe_pkt_info *pkt, 583 + struct rxe_pkt_info *ack, 584 + int opcode, 585 + int payload, 586 + u32 psn, 587 + u8 syndrome, 588 + u32 *crcp) 589 + { 590 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 591 + struct sk_buff *skb; 592 + u32 crc = 0; 593 + u32 *p; 594 + int paylen; 595 + int pad; 596 + int err; 597 + 598 + /* 599 + * allocate packet 600 + */ 601 + pad = (-payload) & 0x3; 602 + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 603 + 604 + skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack); 605 + if (!skb) 606 + return NULL; 607 + 608 + ack->qp = qp; 609 + ack->opcode = opcode; 610 + ack->mask = rxe_opcode[opcode].mask; 611 + ack->offset = pkt->offset; 612 + ack->paylen = paylen; 613 + 614 + /* fill in bth using the request packet headers */ 615 + memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); 616 + 617 + bth_set_opcode(ack, opcode); 618 + bth_set_qpn(ack, qp->attr.dest_qp_num); 619 + bth_set_pad(ack, pad); 620 + bth_set_se(ack, 0); 621 + bth_set_psn(ack, psn); 622 + bth_set_ack(ack, 0); 623 + ack->psn = psn; 624 + 625 + if (ack->mask & RXE_AETH_MASK) { 626 + aeth_set_syn(ack, syndrome); 627 + aeth_set_msn(ack, qp->resp.msn); 628 + } 629 + 630 + if (ack->mask & RXE_ATMACK_MASK) 631 + atmack_set_orig(ack, qp->resp.atomic_orig); 632 + 633 + err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc); 634 + if (err) { 635 + kfree_skb(skb); 636 + return NULL; 637 + } 638 + 639 + if (crcp) { 640 + /* CRC computation will be continued by the caller */ 641 + *crcp = crc; 642 + } else { 643 + p = payload_addr(ack) + payload + bth_pad(ack); 644 + *p = ~crc; 645 + } 646 + 647 + return skb; 648 + } 649 + 650 + /* RDMA read response. If res is not NULL, then we have a current RDMA request 651 + * being processed or replayed. 652 + */ 653 + static enum resp_states read_reply(struct rxe_qp *qp, 654 + struct rxe_pkt_info *req_pkt) 655 + { 656 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 657 + struct rxe_pkt_info ack_pkt; 658 + struct sk_buff *skb; 659 + int mtu = qp->mtu; 660 + enum resp_states state; 661 + int payload; 662 + int opcode; 663 + int err; 664 + struct resp_res *res = qp->resp.res; 665 + u32 icrc; 666 + u32 *p; 667 + 668 + if (!res) { 669 + /* This is the first time we process that request. Get a 670 + * resource 671 + */ 672 + res = &qp->resp.resources[qp->resp.res_head]; 673 + 674 + free_rd_atomic_resource(qp, res); 675 + rxe_advance_resp_resource(qp); 676 + 677 + res->type = RXE_READ_MASK; 678 + 679 + res->read.va = qp->resp.va; 680 + res->read.va_org = qp->resp.va; 681 + 682 + res->first_psn = req_pkt->psn; 683 + res->last_psn = req_pkt->psn + 684 + (reth_len(req_pkt) + mtu - 1) / 685 + mtu - 1; 686 + res->cur_psn = req_pkt->psn; 687 + 688 + res->read.resid = qp->resp.resid; 689 + res->read.length = qp->resp.resid; 690 + res->read.rkey = qp->resp.rkey; 691 + 692 + /* note res inherits the reference to mr from qp */ 693 + res->read.mr = qp->resp.mr; 694 + qp->resp.mr = NULL; 695 + 696 + qp->resp.res = res; 697 + res->state = rdatm_res_state_new; 698 + } 699 + 700 + if (res->state == rdatm_res_state_new) { 701 + if (res->read.resid <= mtu) 702 + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 703 + else 704 + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 705 + } else { 706 + if (res->read.resid > mtu) 707 + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 708 + else 709 + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 710 + } 711 + 712 + res->state = rdatm_res_state_next; 713 + 714 + payload = min_t(int, res->read.resid, mtu); 715 + 716 + skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, 717 + res->cur_psn, AETH_ACK_UNLIMITED, &icrc); 718 + if (!skb) 719 + return RESPST_ERR_RNR; 720 + 721 + err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), 722 + payload, from_mem_obj, &icrc); 723 + if (err) 724 + pr_err("Failed copying memory\n"); 725 + 726 + p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); 727 + *p = ~icrc; 728 + 729 + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); 730 + if (err) { 731 + pr_err("Failed sending RDMA reply.\n"); 732 + kfree_skb(skb); 733 + return RESPST_ERR_RNR; 734 + } 735 + 736 + res->read.va += payload; 737 + res->read.resid -= payload; 738 + res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 739 + 740 + if (res->read.resid > 0) { 741 + state = RESPST_DONE; 742 + } else { 743 + qp->resp.res = NULL; 744 + qp->resp.opcode = -1; 745 + qp->resp.psn = res->cur_psn; 746 + state = RESPST_CLEANUP; 747 + } 748 + 749 + return state; 750 + } 751 + 752 + /* Executes a new request. A retried request never reach that function (send 753 + * and writes are discarded, and reads and atomics are retried elsewhere. 754 + */ 755 + static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 756 + { 757 + enum resp_states err; 758 + 759 + if (pkt->mask & RXE_SEND_MASK) { 760 + if (qp_type(qp) == IB_QPT_UD || 761 + qp_type(qp) == IB_QPT_SMI || 762 + qp_type(qp) == IB_QPT_GSI) { 763 + union rdma_network_hdr hdr; 764 + struct sk_buff *skb = PKT_TO_SKB(pkt); 765 + 766 + memset(&hdr, 0, sizeof(hdr)); 767 + if (skb->protocol == htons(ETH_P_IP)) 768 + memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); 769 + else if (skb->protocol == htons(ETH_P_IPV6)) 770 + memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); 771 + 772 + err = send_data_in(qp, &hdr, sizeof(hdr)); 773 + if (err) 774 + return err; 775 + } 776 + err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 777 + if (err) 778 + return err; 779 + } else if (pkt->mask & RXE_WRITE_MASK) { 780 + err = write_data_in(qp, pkt); 781 + if (err) 782 + return err; 783 + } else if (pkt->mask & RXE_READ_MASK) { 784 + /* For RDMA Read we can increment the msn now. See C9-148. */ 785 + qp->resp.msn++; 786 + return RESPST_READ_REPLY; 787 + } else if (pkt->mask & RXE_ATOMIC_MASK) { 788 + err = process_atomic(qp, pkt); 789 + if (err) 790 + return err; 791 + } else 792 + /* Unreachable */ 793 + WARN_ON(1); 794 + 795 + /* We successfully processed this new request. */ 796 + qp->resp.msn++; 797 + 798 + /* next expected psn, read handles this separately */ 799 + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 800 + 801 + qp->resp.opcode = pkt->opcode; 802 + qp->resp.status = IB_WC_SUCCESS; 803 + 804 + if (pkt->mask & RXE_COMP_MASK) 805 + return RESPST_COMPLETE; 806 + else if (qp_type(qp) == IB_QPT_RC) 807 + return RESPST_ACKNOWLEDGE; 808 + else 809 + return RESPST_CLEANUP; 810 + } 811 + 812 + static enum resp_states do_complete(struct rxe_qp *qp, 813 + struct rxe_pkt_info *pkt) 814 + { 815 + struct rxe_cqe cqe; 816 + struct ib_wc *wc = &cqe.ibwc; 817 + struct ib_uverbs_wc *uwc = &cqe.uibwc; 818 + struct rxe_recv_wqe *wqe = qp->resp.wqe; 819 + 820 + if (unlikely(!wqe)) 821 + return RESPST_CLEANUP; 822 + 823 + memset(&cqe, 0, sizeof(cqe)); 824 + 825 + wc->wr_id = wqe->wr_id; 826 + wc->status = qp->resp.status; 827 + wc->qp = &qp->ibqp; 828 + 829 + /* fields after status are not required for errors */ 830 + if (wc->status == IB_WC_SUCCESS) { 831 + wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 832 + pkt->mask & RXE_WRITE_MASK) ? 833 + IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 834 + wc->vendor_err = 0; 835 + wc->byte_len = wqe->dma.length - wqe->dma.resid; 836 + 837 + /* fields after byte_len are different between kernel and user 838 + * space 839 + */ 840 + if (qp->rcq->is_user) { 841 + uwc->wc_flags = IB_WC_GRH; 842 + 843 + if (pkt->mask & RXE_IMMDT_MASK) { 844 + uwc->wc_flags |= IB_WC_WITH_IMM; 845 + uwc->ex.imm_data = 846 + (__u32 __force)immdt_imm(pkt); 847 + } 848 + 849 + if (pkt->mask & RXE_IETH_MASK) { 850 + uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 851 + uwc->ex.invalidate_rkey = ieth_rkey(pkt); 852 + } 853 + 854 + uwc->qp_num = qp->ibqp.qp_num; 855 + 856 + if (pkt->mask & RXE_DETH_MASK) 857 + uwc->src_qp = deth_sqp(pkt); 858 + 859 + uwc->port_num = qp->attr.port_num; 860 + } else { 861 + struct sk_buff *skb = PKT_TO_SKB(pkt); 862 + 863 + wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 864 + if (skb->protocol == htons(ETH_P_IP)) 865 + wc->network_hdr_type = RDMA_NETWORK_IPV4; 866 + else 867 + wc->network_hdr_type = RDMA_NETWORK_IPV6; 868 + 869 + if (pkt->mask & RXE_IMMDT_MASK) { 870 + wc->wc_flags |= IB_WC_WITH_IMM; 871 + wc->ex.imm_data = immdt_imm(pkt); 872 + } 873 + 874 + if (pkt->mask & RXE_IETH_MASK) { 875 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 876 + struct rxe_mem *rmr; 877 + 878 + wc->wc_flags |= IB_WC_WITH_INVALIDATE; 879 + wc->ex.invalidate_rkey = ieth_rkey(pkt); 880 + 881 + rmr = rxe_pool_get_index(&rxe->mr_pool, 882 + wc->ex.invalidate_rkey >> 8); 883 + if (unlikely(!rmr)) { 884 + pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); 885 + return RESPST_ERROR; 886 + } 887 + rmr->state = RXE_MEM_STATE_FREE; 888 + } 889 + 890 + wc->qp = &qp->ibqp; 891 + 892 + if (pkt->mask & RXE_DETH_MASK) 893 + wc->src_qp = deth_sqp(pkt); 894 + 895 + wc->port_num = qp->attr.port_num; 896 + } 897 + } 898 + 899 + /* have copy for srq and reference for !srq */ 900 + if (!qp->srq) 901 + advance_consumer(qp->rq.queue); 902 + 903 + qp->resp.wqe = NULL; 904 + 905 + if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 906 + return RESPST_ERR_CQ_OVERFLOW; 907 + 908 + if (qp->resp.state == QP_STATE_ERROR) 909 + return RESPST_CHK_RESOURCE; 910 + 911 + if (!pkt) 912 + return RESPST_DONE; 913 + else if (qp_type(qp) == IB_QPT_RC) 914 + return RESPST_ACKNOWLEDGE; 915 + else 916 + return RESPST_CLEANUP; 917 + } 918 + 919 + static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, 920 + u8 syndrome, u32 psn) 921 + { 922 + int err = 0; 923 + struct rxe_pkt_info ack_pkt; 924 + struct sk_buff *skb; 925 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 926 + 927 + skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, 928 + 0, psn, syndrome, NULL); 929 + if (!skb) { 930 + err = -ENOMEM; 931 + goto err1; 932 + } 933 + 934 + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); 935 + if (err) { 936 + pr_err_ratelimited("Failed sending ack\n"); 937 + kfree_skb(skb); 938 + } 939 + 940 + err1: 941 + return err; 942 + } 943 + 944 + static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, 945 + u8 syndrome) 946 + { 947 + int rc = 0; 948 + struct rxe_pkt_info ack_pkt; 949 + struct sk_buff *skb; 950 + struct sk_buff *skb_copy; 951 + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 952 + struct resp_res *res; 953 + 954 + skb = prepare_ack_packet(qp, pkt, &ack_pkt, 955 + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, 956 + syndrome, NULL); 957 + if (!skb) { 958 + rc = -ENOMEM; 959 + goto out; 960 + } 961 + 962 + skb_copy = skb_clone(skb, GFP_ATOMIC); 963 + if (skb_copy) 964 + rxe_add_ref(qp); /* for the new SKB */ 965 + else { 966 + pr_warn("Could not clone atomic response\n"); 967 + rc = -ENOMEM; 968 + goto out; 969 + } 970 + 971 + res = &qp->resp.resources[qp->resp.res_head]; 972 + free_rd_atomic_resource(qp, res); 973 + rxe_advance_resp_resource(qp); 974 + 975 + res->type = RXE_ATOMIC_MASK; 976 + res->atomic.skb = skb; 977 + res->first_psn = qp->resp.psn; 978 + res->last_psn = qp->resp.psn; 979 + res->cur_psn = qp->resp.psn; 980 + 981 + rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); 982 + if (rc) { 983 + pr_err_ratelimited("Failed sending ack\n"); 984 + rxe_drop_ref(qp); 985 + kfree_skb(skb_copy); 986 + } 987 + 988 + out: 989 + return rc; 990 + } 991 + 992 + static enum resp_states acknowledge(struct rxe_qp *qp, 993 + struct rxe_pkt_info *pkt) 994 + { 995 + if (qp_type(qp) != IB_QPT_RC) 996 + return RESPST_CLEANUP; 997 + 998 + if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 999 + send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); 1000 + else if (pkt->mask & RXE_ATOMIC_MASK) 1001 + send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); 1002 + else if (bth_ack(pkt)) 1003 + send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); 1004 + 1005 + return RESPST_CLEANUP; 1006 + } 1007 + 1008 + static enum resp_states cleanup(struct rxe_qp *qp, 1009 + struct rxe_pkt_info *pkt) 1010 + { 1011 + struct sk_buff *skb; 1012 + 1013 + if (pkt) { 1014 + skb = skb_dequeue(&qp->req_pkts); 1015 + rxe_drop_ref(qp); 1016 + kfree_skb(skb); 1017 + } 1018 + 1019 + if (qp->resp.mr) { 1020 + rxe_drop_ref(qp->resp.mr); 1021 + qp->resp.mr = NULL; 1022 + } 1023 + 1024 + return RESPST_DONE; 1025 + } 1026 + 1027 + static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1028 + { 1029 + int i; 1030 + 1031 + for (i = 0; i < qp->attr.max_rd_atomic; i++) { 1032 + struct resp_res *res = &qp->resp.resources[i]; 1033 + 1034 + if (res->type == 0) 1035 + continue; 1036 + 1037 + if (psn_compare(psn, res->first_psn) >= 0 && 1038 + psn_compare(psn, res->last_psn) <= 0) { 1039 + return res; 1040 + } 1041 + } 1042 + 1043 + return NULL; 1044 + } 1045 + 1046 + static enum resp_states duplicate_request(struct rxe_qp *qp, 1047 + struct rxe_pkt_info *pkt) 1048 + { 1049 + enum resp_states rc; 1050 + 1051 + if (pkt->mask & RXE_SEND_MASK || 1052 + pkt->mask & RXE_WRITE_MASK) { 1053 + /* SEND. Ack again and cleanup. C9-105. */ 1054 + if (bth_ack(pkt)) 1055 + send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1); 1056 + rc = RESPST_CLEANUP; 1057 + goto out; 1058 + } else if (pkt->mask & RXE_READ_MASK) { 1059 + struct resp_res *res; 1060 + 1061 + res = find_resource(qp, pkt->psn); 1062 + if (!res) { 1063 + /* Resource not found. Class D error. Drop the 1064 + * request. 1065 + */ 1066 + rc = RESPST_CLEANUP; 1067 + goto out; 1068 + } else { 1069 + /* Ensure this new request is the same as the previous 1070 + * one or a subset of it. 1071 + */ 1072 + u64 iova = reth_va(pkt); 1073 + u32 resid = reth_len(pkt); 1074 + 1075 + if (iova < res->read.va_org || 1076 + resid > res->read.length || 1077 + (iova + resid) > (res->read.va_org + 1078 + res->read.length)) { 1079 + rc = RESPST_CLEANUP; 1080 + goto out; 1081 + } 1082 + 1083 + if (reth_rkey(pkt) != res->read.rkey) { 1084 + rc = RESPST_CLEANUP; 1085 + goto out; 1086 + } 1087 + 1088 + res->cur_psn = pkt->psn; 1089 + res->state = (pkt->psn == res->first_psn) ? 1090 + rdatm_res_state_new : 1091 + rdatm_res_state_replay; 1092 + 1093 + /* Reset the resource, except length. */ 1094 + res->read.va_org = iova; 1095 + res->read.va = iova; 1096 + res->read.resid = resid; 1097 + 1098 + /* Replay the RDMA read reply. */ 1099 + qp->resp.res = res; 1100 + rc = RESPST_READ_REPLY; 1101 + goto out; 1102 + } 1103 + } else { 1104 + struct resp_res *res; 1105 + 1106 + /* Find the operation in our list of responder resources. */ 1107 + res = find_resource(qp, pkt->psn); 1108 + if (res) { 1109 + struct sk_buff *skb_copy; 1110 + 1111 + skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC); 1112 + if (skb_copy) { 1113 + rxe_add_ref(qp); /* for the new SKB */ 1114 + } else { 1115 + pr_warn("Couldn't clone atomic resp\n"); 1116 + rc = RESPST_CLEANUP; 1117 + goto out; 1118 + } 1119 + bth_set_psn(SKB_TO_PKT(skb_copy), 1120 + qp->resp.psn - 1); 1121 + /* Resend the result. */ 1122 + rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, 1123 + pkt, skb_copy); 1124 + if (rc) { 1125 + pr_err("Failed resending result. This flow is not handled - skb ignored\n"); 1126 + kfree_skb(skb_copy); 1127 + rc = RESPST_CLEANUP; 1128 + goto out; 1129 + } 1130 + } 1131 + 1132 + /* Resource not found. Class D error. Drop the request. */ 1133 + rc = RESPST_CLEANUP; 1134 + goto out; 1135 + } 1136 + out: 1137 + return rc; 1138 + } 1139 + 1140 + /* Process a class A or C. Both are treated the same in this implementation. */ 1141 + static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1142 + enum ib_wc_status status) 1143 + { 1144 + qp->resp.aeth_syndrome = syndrome; 1145 + qp->resp.status = status; 1146 + 1147 + /* indicate that we should go through the ERROR state */ 1148 + qp->resp.goto_error = 1; 1149 + } 1150 + 1151 + static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1152 + { 1153 + /* UC */ 1154 + if (qp->srq) { 1155 + /* Class E */ 1156 + qp->resp.drop_msg = 1; 1157 + if (qp->resp.wqe) { 1158 + qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1159 + return RESPST_COMPLETE; 1160 + } else { 1161 + return RESPST_CLEANUP; 1162 + } 1163 + } else { 1164 + /* Class D1. This packet may be the start of a 1165 + * new message and could be valid. The previous 1166 + * message is invalid and ignored. reset the 1167 + * recv wr to its original state 1168 + */ 1169 + if (qp->resp.wqe) { 1170 + qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1171 + qp->resp.wqe->dma.cur_sge = 0; 1172 + qp->resp.wqe->dma.sge_offset = 0; 1173 + qp->resp.opcode = -1; 1174 + } 1175 + 1176 + if (qp->resp.mr) { 1177 + rxe_drop_ref(qp->resp.mr); 1178 + qp->resp.mr = NULL; 1179 + } 1180 + 1181 + return RESPST_CLEANUP; 1182 + } 1183 + } 1184 + 1185 + int rxe_responder(void *arg) 1186 + { 1187 + struct rxe_qp *qp = (struct rxe_qp *)arg; 1188 + enum resp_states state; 1189 + struct rxe_pkt_info *pkt = NULL; 1190 + int ret = 0; 1191 + 1192 + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1193 + 1194 + if (!qp->valid) { 1195 + ret = -EINVAL; 1196 + goto done; 1197 + } 1198 + 1199 + switch (qp->resp.state) { 1200 + case QP_STATE_RESET: 1201 + state = RESPST_RESET; 1202 + break; 1203 + 1204 + default: 1205 + state = RESPST_GET_REQ; 1206 + break; 1207 + } 1208 + 1209 + while (1) { 1210 + pr_debug("state = %s\n", resp_state_name[state]); 1211 + switch (state) { 1212 + case RESPST_GET_REQ: 1213 + state = get_req(qp, &pkt); 1214 + break; 1215 + case RESPST_CHK_PSN: 1216 + state = check_psn(qp, pkt); 1217 + break; 1218 + case RESPST_CHK_OP_SEQ: 1219 + state = check_op_seq(qp, pkt); 1220 + break; 1221 + case RESPST_CHK_OP_VALID: 1222 + state = check_op_valid(qp, pkt); 1223 + break; 1224 + case RESPST_CHK_RESOURCE: 1225 + state = check_resource(qp, pkt); 1226 + break; 1227 + case RESPST_CHK_LENGTH: 1228 + state = check_length(qp, pkt); 1229 + break; 1230 + case RESPST_CHK_RKEY: 1231 + state = check_rkey(qp, pkt); 1232 + break; 1233 + case RESPST_EXECUTE: 1234 + state = execute(qp, pkt); 1235 + break; 1236 + case RESPST_COMPLETE: 1237 + state = do_complete(qp, pkt); 1238 + break; 1239 + case RESPST_READ_REPLY: 1240 + state = read_reply(qp, pkt); 1241 + break; 1242 + case RESPST_ACKNOWLEDGE: 1243 + state = acknowledge(qp, pkt); 1244 + break; 1245 + case RESPST_CLEANUP: 1246 + state = cleanup(qp, pkt); 1247 + break; 1248 + case RESPST_DUPLICATE_REQUEST: 1249 + state = duplicate_request(qp, pkt); 1250 + break; 1251 + case RESPST_ERR_PSN_OUT_OF_SEQ: 1252 + /* RC only - Class B. Drop packet. */ 1253 + send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1254 + state = RESPST_CLEANUP; 1255 + break; 1256 + 1257 + case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1258 + case RESPST_ERR_MISSING_OPCODE_FIRST: 1259 + case RESPST_ERR_MISSING_OPCODE_LAST_C: 1260 + case RESPST_ERR_UNSUPPORTED_OPCODE: 1261 + case RESPST_ERR_MISALIGNED_ATOMIC: 1262 + /* RC Only - Class C. */ 1263 + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1264 + IB_WC_REM_INV_REQ_ERR); 1265 + state = RESPST_COMPLETE; 1266 + break; 1267 + 1268 + case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1269 + state = do_class_d1e_error(qp); 1270 + break; 1271 + case RESPST_ERR_RNR: 1272 + if (qp_type(qp) == IB_QPT_RC) { 1273 + /* RC - class B */ 1274 + send_ack(qp, pkt, AETH_RNR_NAK | 1275 + (~AETH_TYPE_MASK & 1276 + qp->attr.min_rnr_timer), 1277 + pkt->psn); 1278 + } else { 1279 + /* UD/UC - class D */ 1280 + qp->resp.drop_msg = 1; 1281 + } 1282 + state = RESPST_CLEANUP; 1283 + break; 1284 + 1285 + case RESPST_ERR_RKEY_VIOLATION: 1286 + if (qp_type(qp) == IB_QPT_RC) { 1287 + /* Class C */ 1288 + do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1289 + IB_WC_REM_ACCESS_ERR); 1290 + state = RESPST_COMPLETE; 1291 + } else { 1292 + qp->resp.drop_msg = 1; 1293 + if (qp->srq) { 1294 + /* UC/SRQ Class D */ 1295 + qp->resp.status = IB_WC_REM_ACCESS_ERR; 1296 + state = RESPST_COMPLETE; 1297 + } else { 1298 + /* UC/non-SRQ Class E. */ 1299 + state = RESPST_CLEANUP; 1300 + } 1301 + } 1302 + break; 1303 + 1304 + case RESPST_ERR_LENGTH: 1305 + if (qp_type(qp) == IB_QPT_RC) { 1306 + /* Class C */ 1307 + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1308 + IB_WC_REM_INV_REQ_ERR); 1309 + state = RESPST_COMPLETE; 1310 + } else if (qp->srq) { 1311 + /* UC/UD - class E */ 1312 + qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1313 + state = RESPST_COMPLETE; 1314 + } else { 1315 + /* UC/UD - class D */ 1316 + qp->resp.drop_msg = 1; 1317 + state = RESPST_CLEANUP; 1318 + } 1319 + break; 1320 + 1321 + case RESPST_ERR_MALFORMED_WQE: 1322 + /* All, Class A. */ 1323 + do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1324 + IB_WC_LOC_QP_OP_ERR); 1325 + state = RESPST_COMPLETE; 1326 + break; 1327 + 1328 + case RESPST_ERR_CQ_OVERFLOW: 1329 + /* All - Class G */ 1330 + state = RESPST_ERROR; 1331 + break; 1332 + 1333 + case RESPST_DONE: 1334 + if (qp->resp.goto_error) { 1335 + state = RESPST_ERROR; 1336 + break; 1337 + } 1338 + 1339 + goto done; 1340 + 1341 + case RESPST_EXIT: 1342 + if (qp->resp.goto_error) { 1343 + state = RESPST_ERROR; 1344 + break; 1345 + } 1346 + 1347 + goto exit; 1348 + 1349 + case RESPST_RESET: { 1350 + struct sk_buff *skb; 1351 + 1352 + while ((skb = skb_dequeue(&qp->req_pkts))) { 1353 + rxe_drop_ref(qp); 1354 + kfree_skb(skb); 1355 + } 1356 + 1357 + while (!qp->srq && qp->rq.queue && 1358 + queue_head(qp->rq.queue)) 1359 + advance_consumer(qp->rq.queue); 1360 + 1361 + qp->resp.wqe = NULL; 1362 + goto exit; 1363 + } 1364 + 1365 + case RESPST_ERROR: 1366 + qp->resp.goto_error = 0; 1367 + pr_warn("qp#%d moved to error state\n", qp_num(qp)); 1368 + rxe_qp_error(qp); 1369 + goto exit; 1370 + 1371 + default: 1372 + WARN_ON(1); 1373 + } 1374 + } 1375 + 1376 + exit: 1377 + ret = -EAGAIN; 1378 + done: 1379 + return ret; 1380 + }
+193
drivers/infiniband/sw/rxe/rxe_srq.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + #include "rxe_queue.h" 37 + 38 + int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 39 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) 40 + { 41 + if (srq && srq->error) { 42 + pr_warn("srq in error state\n"); 43 + goto err1; 44 + } 45 + 46 + if (mask & IB_SRQ_MAX_WR) { 47 + if (attr->max_wr > rxe->attr.max_srq_wr) { 48 + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", 49 + attr->max_wr, rxe->attr.max_srq_wr); 50 + goto err1; 51 + } 52 + 53 + if (attr->max_wr <= 0) { 54 + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); 55 + goto err1; 56 + } 57 + 58 + if (srq && srq->limit && (attr->max_wr < srq->limit)) { 59 + pr_warn("max_wr (%d) < srq->limit (%d)\n", 60 + attr->max_wr, srq->limit); 61 + goto err1; 62 + } 63 + 64 + if (attr->max_wr < RXE_MIN_SRQ_WR) 65 + attr->max_wr = RXE_MIN_SRQ_WR; 66 + } 67 + 68 + if (mask & IB_SRQ_LIMIT) { 69 + if (attr->srq_limit > rxe->attr.max_srq_wr) { 70 + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", 71 + attr->srq_limit, rxe->attr.max_srq_wr); 72 + goto err1; 73 + } 74 + 75 + if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { 76 + pr_warn("srq_limit (%d) > cur limit(%d)\n", 77 + attr->srq_limit, 78 + srq->rq.queue->buf->index_mask); 79 + goto err1; 80 + } 81 + } 82 + 83 + if (mask == IB_SRQ_INIT_MASK) { 84 + if (attr->max_sge > rxe->attr.max_srq_sge) { 85 + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", 86 + attr->max_sge, rxe->attr.max_srq_sge); 87 + goto err1; 88 + } 89 + 90 + if (attr->max_sge < RXE_MIN_SRQ_SGE) 91 + attr->max_sge = RXE_MIN_SRQ_SGE; 92 + } 93 + 94 + return 0; 95 + 96 + err1: 97 + return -EINVAL; 98 + } 99 + 100 + int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, 101 + struct ib_srq_init_attr *init, 102 + struct ib_ucontext *context, struct ib_udata *udata) 103 + { 104 + int err; 105 + int srq_wqe_size; 106 + struct rxe_queue *q; 107 + 108 + srq->ibsrq.event_handler = init->event_handler; 109 + srq->ibsrq.srq_context = init->srq_context; 110 + srq->limit = init->attr.srq_limit; 111 + srq->srq_num = srq->pelem.index; 112 + srq->rq.max_wr = init->attr.max_wr; 113 + srq->rq.max_sge = init->attr.max_sge; 114 + 115 + srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); 116 + 117 + spin_lock_init(&srq->rq.producer_lock); 118 + spin_lock_init(&srq->rq.consumer_lock); 119 + 120 + q = rxe_queue_init(rxe, &srq->rq.max_wr, 121 + srq_wqe_size); 122 + if (!q) { 123 + pr_warn("unable to allocate queue for srq\n"); 124 + return -ENOMEM; 125 + } 126 + 127 + srq->rq.queue = q; 128 + 129 + err = do_mmap_info(rxe, udata, false, context, q->buf, 130 + q->buf_size, &q->ip); 131 + if (err) 132 + return err; 133 + 134 + if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) { 135 + if (copy_to_user(udata->outbuf + sizeof(struct mminfo), 136 + &srq->srq_num, sizeof(u32))) 137 + return -EFAULT; 138 + } 139 + return 0; 140 + } 141 + 142 + int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 143 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, 144 + struct ib_udata *udata) 145 + { 146 + int err; 147 + struct rxe_queue *q = srq->rq.queue; 148 + struct mminfo mi = { .offset = 1, .size = 0}; 149 + 150 + if (mask & IB_SRQ_MAX_WR) { 151 + /* Check that we can write the mminfo struct to user space */ 152 + if (udata && udata->inlen >= sizeof(__u64)) { 153 + __u64 mi_addr; 154 + 155 + /* Get address of user space mminfo struct */ 156 + err = ib_copy_from_udata(&mi_addr, udata, 157 + sizeof(mi_addr)); 158 + if (err) 159 + goto err1; 160 + 161 + udata->outbuf = (void __user *)(unsigned long)mi_addr; 162 + udata->outlen = sizeof(mi); 163 + 164 + if (!access_ok(VERIFY_WRITE, 165 + (void __user *)udata->outbuf, 166 + udata->outlen)) { 167 + err = -EFAULT; 168 + goto err1; 169 + } 170 + } 171 + 172 + err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, 173 + rcv_wqe_size(srq->rq.max_sge), 174 + srq->rq.queue->ip ? 175 + srq->rq.queue->ip->context : 176 + NULL, 177 + udata, &srq->rq.producer_lock, 178 + &srq->rq.consumer_lock); 179 + if (err) 180 + goto err2; 181 + } 182 + 183 + if (mask & IB_SRQ_LIMIT) 184 + srq->limit = attr->srq_limit; 185 + 186 + return 0; 187 + 188 + err2: 189 + rxe_queue_cleanup(q); 190 + srq->rq.queue = NULL; 191 + err1: 192 + return err; 193 + }
+157
drivers/infiniband/sw/rxe/rxe_sysfs.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_net.h" 36 + 37 + /* Copy argument and remove trailing CR. Return the new length. */ 38 + static int sanitize_arg(const char *val, char *intf, int intf_len) 39 + { 40 + int len; 41 + 42 + if (!val) 43 + return 0; 44 + 45 + /* Remove newline. */ 46 + for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++) 47 + intf[len] = val[len]; 48 + intf[len] = 0; 49 + 50 + if (len == 0 || (val[len] != 0 && val[len] != '\n')) 51 + return 0; 52 + 53 + return len; 54 + } 55 + 56 + static void rxe_set_port_state(struct net_device *ndev) 57 + { 58 + struct rxe_dev *rxe = net_to_rxe(ndev); 59 + bool is_up = netif_running(ndev) && netif_carrier_ok(ndev); 60 + 61 + if (!rxe) 62 + goto out; 63 + 64 + if (is_up) 65 + rxe_port_up(rxe); 66 + else 67 + rxe_port_down(rxe); /* down for unknown state */ 68 + out: 69 + return; 70 + } 71 + 72 + static int rxe_param_set_add(const char *val, const struct kernel_param *kp) 73 + { 74 + int len; 75 + int err = 0; 76 + char intf[32]; 77 + struct net_device *ndev = NULL; 78 + struct rxe_dev *rxe; 79 + 80 + len = sanitize_arg(val, intf, sizeof(intf)); 81 + if (!len) { 82 + pr_err("rxe: add: invalid interface name\n"); 83 + err = -EINVAL; 84 + goto err; 85 + } 86 + 87 + ndev = dev_get_by_name(&init_net, intf); 88 + if (!ndev) { 89 + pr_err("interface %s not found\n", intf); 90 + err = -EINVAL; 91 + goto err; 92 + } 93 + 94 + if (net_to_rxe(ndev)) { 95 + pr_err("rxe: already configured on %s\n", intf); 96 + err = -EINVAL; 97 + goto err; 98 + } 99 + 100 + rxe = rxe_net_add(ndev); 101 + if (!rxe) { 102 + pr_err("rxe: failed to add %s\n", intf); 103 + err = -EINVAL; 104 + goto err; 105 + } 106 + 107 + rxe_set_port_state(ndev); 108 + pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); 109 + err: 110 + if (ndev) 111 + dev_put(ndev); 112 + return err; 113 + } 114 + 115 + static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) 116 + { 117 + int len; 118 + char intf[32]; 119 + struct rxe_dev *rxe; 120 + 121 + len = sanitize_arg(val, intf, sizeof(intf)); 122 + if (!len) { 123 + pr_err("rxe: add: invalid interface name\n"); 124 + return -EINVAL; 125 + } 126 + 127 + if (strncmp("all", intf, len) == 0) { 128 + pr_info("rxe_sys: remove all"); 129 + rxe_remove_all(); 130 + return 0; 131 + } 132 + 133 + rxe = get_rxe_by_name(intf); 134 + 135 + if (!rxe) { 136 + pr_err("rxe: not configured on %s\n", intf); 137 + return -EINVAL; 138 + } 139 + 140 + list_del(&rxe->list); 141 + rxe_remove(rxe); 142 + 143 + return 0; 144 + } 145 + 146 + static const struct kernel_param_ops rxe_add_ops = { 147 + .set = rxe_param_set_add, 148 + }; 149 + 150 + static const struct kernel_param_ops rxe_remove_ops = { 151 + .set = rxe_param_set_remove, 152 + }; 153 + 154 + module_param_cb(add, &rxe_add_ops, NULL, 0200); 155 + MODULE_PARM_DESC(add, "Create RXE device over network interface"); 156 + module_param_cb(remove, &rxe_remove_ops, NULL, 0200); 157 + MODULE_PARM_DESC(remove, "Remove RXE device over network interface");
+154
drivers/infiniband/sw/rxe/rxe_task.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include <linux/kernel.h> 35 + #include <linux/interrupt.h> 36 + #include <linux/hardirq.h> 37 + 38 + #include "rxe_task.h" 39 + 40 + int __rxe_do_task(struct rxe_task *task) 41 + 42 + { 43 + int ret; 44 + 45 + while ((ret = task->func(task->arg)) == 0) 46 + ; 47 + 48 + task->ret = ret; 49 + 50 + return ret; 51 + } 52 + 53 + /* 54 + * this locking is due to a potential race where 55 + * a second caller finds the task already running 56 + * but looks just after the last call to func 57 + */ 58 + void rxe_do_task(unsigned long data) 59 + { 60 + int cont; 61 + int ret; 62 + unsigned long flags; 63 + struct rxe_task *task = (struct rxe_task *)data; 64 + 65 + spin_lock_irqsave(&task->state_lock, flags); 66 + switch (task->state) { 67 + case TASK_STATE_START: 68 + task->state = TASK_STATE_BUSY; 69 + spin_unlock_irqrestore(&task->state_lock, flags); 70 + break; 71 + 72 + case TASK_STATE_BUSY: 73 + task->state = TASK_STATE_ARMED; 74 + /* fall through to */ 75 + case TASK_STATE_ARMED: 76 + spin_unlock_irqrestore(&task->state_lock, flags); 77 + return; 78 + 79 + default: 80 + spin_unlock_irqrestore(&task->state_lock, flags); 81 + pr_warn("bad state = %d in rxe_do_task\n", task->state); 82 + return; 83 + } 84 + 85 + do { 86 + cont = 0; 87 + ret = task->func(task->arg); 88 + 89 + spin_lock_irqsave(&task->state_lock, flags); 90 + switch (task->state) { 91 + case TASK_STATE_BUSY: 92 + if (ret) 93 + task->state = TASK_STATE_START; 94 + else 95 + cont = 1; 96 + break; 97 + 98 + /* soneone tried to run the task since the last time we called 99 + * func, so we will call one more time regardless of the 100 + * return value 101 + */ 102 + case TASK_STATE_ARMED: 103 + task->state = TASK_STATE_BUSY; 104 + cont = 1; 105 + break; 106 + 107 + default: 108 + pr_warn("bad state = %d in rxe_do_task\n", 109 + task->state); 110 + } 111 + spin_unlock_irqrestore(&task->state_lock, flags); 112 + } while (cont); 113 + 114 + task->ret = ret; 115 + } 116 + 117 + int rxe_init_task(void *obj, struct rxe_task *task, 118 + void *arg, int (*func)(void *), char *name) 119 + { 120 + task->obj = obj; 121 + task->arg = arg; 122 + task->func = func; 123 + snprintf(task->name, sizeof(task->name), "%s", name); 124 + 125 + tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); 126 + 127 + task->state = TASK_STATE_START; 128 + spin_lock_init(&task->state_lock); 129 + 130 + return 0; 131 + } 132 + 133 + void rxe_cleanup_task(struct rxe_task *task) 134 + { 135 + tasklet_kill(&task->tasklet); 136 + } 137 + 138 + void rxe_run_task(struct rxe_task *task, int sched) 139 + { 140 + if (sched) 141 + tasklet_schedule(&task->tasklet); 142 + else 143 + rxe_do_task((unsigned long)task); 144 + } 145 + 146 + void rxe_disable_task(struct rxe_task *task) 147 + { 148 + tasklet_disable(&task->tasklet); 149 + } 150 + 151 + void rxe_enable_task(struct rxe_task *task) 152 + { 153 + tasklet_enable(&task->tasklet); 154 + }
+95
drivers/infiniband/sw/rxe/rxe_task.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_TASK_H 35 + #define RXE_TASK_H 36 + 37 + enum { 38 + TASK_STATE_START = 0, 39 + TASK_STATE_BUSY = 1, 40 + TASK_STATE_ARMED = 2, 41 + }; 42 + 43 + /* 44 + * data structure to describe a 'task' which is a short 45 + * function that returns 0 as long as it needs to be 46 + * called again. 47 + */ 48 + struct rxe_task { 49 + void *obj; 50 + struct tasklet_struct tasklet; 51 + int state; 52 + spinlock_t state_lock; /* spinlock for task state */ 53 + void *arg; 54 + int (*func)(void *arg); 55 + int ret; 56 + char name[16]; 57 + }; 58 + 59 + /* 60 + * init rxe_task structure 61 + * arg => parameter to pass to fcn 62 + * fcn => function to call until it returns != 0 63 + */ 64 + int rxe_init_task(void *obj, struct rxe_task *task, 65 + void *arg, int (*func)(void *), char *name); 66 + 67 + /* cleanup task */ 68 + void rxe_cleanup_task(struct rxe_task *task); 69 + 70 + /* 71 + * raw call to func in loop without any checking 72 + * can call when tasklets are disabled 73 + */ 74 + int __rxe_do_task(struct rxe_task *task); 75 + 76 + /* 77 + * common function called by any of the main tasklets 78 + * If there is any chance that there is additional 79 + * work to do someone must reschedule the task before 80 + * leaving 81 + */ 82 + void rxe_do_task(unsigned long data); 83 + 84 + /* run a task, else schedule it to run as a tasklet, The decision 85 + * to run or schedule tasklet is based on the parameter sched. 86 + */ 87 + void rxe_run_task(struct rxe_task *task, int sched); 88 + 89 + /* keep a task from scheduling */ 90 + void rxe_disable_task(struct rxe_task *task); 91 + 92 + /* allow task to run */ 93 + void rxe_enable_task(struct rxe_task *task); 94 + 95 + #endif /* RXE_TASK_H */
+1330
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #include "rxe.h" 35 + #include "rxe_loc.h" 36 + #include "rxe_queue.h" 37 + 38 + static int rxe_query_device(struct ib_device *dev, 39 + struct ib_device_attr *attr, 40 + struct ib_udata *uhw) 41 + { 42 + struct rxe_dev *rxe = to_rdev(dev); 43 + 44 + if (uhw->inlen || uhw->outlen) 45 + return -EINVAL; 46 + 47 + *attr = rxe->attr; 48 + return 0; 49 + } 50 + 51 + static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, 52 + u8 *active_width) 53 + { 54 + if (speed <= 1000) { 55 + *active_width = IB_WIDTH_1X; 56 + *active_speed = IB_SPEED_SDR; 57 + } else if (speed <= 10000) { 58 + *active_width = IB_WIDTH_1X; 59 + *active_speed = IB_SPEED_FDR10; 60 + } else if (speed <= 20000) { 61 + *active_width = IB_WIDTH_4X; 62 + *active_speed = IB_SPEED_DDR; 63 + } else if (speed <= 30000) { 64 + *active_width = IB_WIDTH_4X; 65 + *active_speed = IB_SPEED_QDR; 66 + } else if (speed <= 40000) { 67 + *active_width = IB_WIDTH_4X; 68 + *active_speed = IB_SPEED_FDR10; 69 + } else { 70 + *active_width = IB_WIDTH_4X; 71 + *active_speed = IB_SPEED_EDR; 72 + } 73 + } 74 + 75 + static int rxe_query_port(struct ib_device *dev, 76 + u8 port_num, struct ib_port_attr *attr) 77 + { 78 + struct rxe_dev *rxe = to_rdev(dev); 79 + struct rxe_port *port; 80 + u32 speed; 81 + 82 + if (unlikely(port_num != 1)) { 83 + pr_warn("invalid port_number %d\n", port_num); 84 + goto err1; 85 + } 86 + 87 + port = &rxe->port; 88 + 89 + *attr = port->attr; 90 + 91 + mutex_lock(&rxe->usdev_lock); 92 + if (rxe->ndev->ethtool_ops->get_link_ksettings) { 93 + struct ethtool_link_ksettings ks; 94 + 95 + rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); 96 + speed = ks.base.speed; 97 + } else if (rxe->ndev->ethtool_ops->get_settings) { 98 + struct ethtool_cmd cmd; 99 + 100 + rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); 101 + speed = cmd.speed; 102 + } else { 103 + pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); 104 + speed = 1000; 105 + } 106 + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); 107 + mutex_unlock(&rxe->usdev_lock); 108 + 109 + return 0; 110 + 111 + err1: 112 + return -EINVAL; 113 + } 114 + 115 + static int rxe_query_gid(struct ib_device *device, 116 + u8 port_num, int index, union ib_gid *gid) 117 + { 118 + int ret; 119 + 120 + if (index > RXE_PORT_GID_TBL_LEN) 121 + return -EINVAL; 122 + 123 + ret = ib_get_cached_gid(device, port_num, index, gid, NULL); 124 + if (ret == -EAGAIN) { 125 + memcpy(gid, &zgid, sizeof(*gid)); 126 + return 0; 127 + } 128 + 129 + return ret; 130 + } 131 + 132 + static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int 133 + index, const union ib_gid *gid, 134 + const struct ib_gid_attr *attr, void **context) 135 + { 136 + if (index >= RXE_PORT_GID_TBL_LEN) 137 + return -EINVAL; 138 + return 0; 139 + } 140 + 141 + static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int 142 + index, void **context) 143 + { 144 + if (index >= RXE_PORT_GID_TBL_LEN) 145 + return -EINVAL; 146 + return 0; 147 + } 148 + 149 + static struct net_device *rxe_get_netdev(struct ib_device *device, 150 + u8 port_num) 151 + { 152 + struct rxe_dev *rxe = to_rdev(device); 153 + 154 + if (rxe->ndev) { 155 + dev_hold(rxe->ndev); 156 + return rxe->ndev; 157 + } 158 + 159 + return NULL; 160 + } 161 + 162 + static int rxe_query_pkey(struct ib_device *device, 163 + u8 port_num, u16 index, u16 *pkey) 164 + { 165 + struct rxe_dev *rxe = to_rdev(device); 166 + struct rxe_port *port; 167 + 168 + if (unlikely(port_num != 1)) { 169 + dev_warn(device->dma_device, "invalid port_num = %d\n", 170 + port_num); 171 + goto err1; 172 + } 173 + 174 + port = &rxe->port; 175 + 176 + if (unlikely(index >= port->attr.pkey_tbl_len)) { 177 + dev_warn(device->dma_device, "invalid index = %d\n", 178 + index); 179 + goto err1; 180 + } 181 + 182 + *pkey = port->pkey_tbl[index]; 183 + return 0; 184 + 185 + err1: 186 + return -EINVAL; 187 + } 188 + 189 + static int rxe_modify_device(struct ib_device *dev, 190 + int mask, struct ib_device_modify *attr) 191 + { 192 + struct rxe_dev *rxe = to_rdev(dev); 193 + 194 + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 195 + rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); 196 + 197 + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { 198 + memcpy(rxe->ib_dev.node_desc, 199 + attr->node_desc, sizeof(rxe->ib_dev.node_desc)); 200 + } 201 + 202 + return 0; 203 + } 204 + 205 + static int rxe_modify_port(struct ib_device *dev, 206 + u8 port_num, int mask, struct ib_port_modify *attr) 207 + { 208 + struct rxe_dev *rxe = to_rdev(dev); 209 + struct rxe_port *port; 210 + 211 + if (unlikely(port_num != 1)) { 212 + pr_warn("invalid port_num = %d\n", port_num); 213 + goto err1; 214 + } 215 + 216 + port = &rxe->port; 217 + 218 + port->attr.port_cap_flags |= attr->set_port_cap_mask; 219 + port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; 220 + 221 + if (mask & IB_PORT_RESET_QKEY_CNTR) 222 + port->attr.qkey_viol_cntr = 0; 223 + 224 + return 0; 225 + 226 + err1: 227 + return -EINVAL; 228 + } 229 + 230 + static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, 231 + u8 port_num) 232 + { 233 + struct rxe_dev *rxe = to_rdev(dev); 234 + 235 + return rxe->ifc_ops->link_layer(rxe, port_num); 236 + } 237 + 238 + static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, 239 + struct ib_udata *udata) 240 + { 241 + struct rxe_dev *rxe = to_rdev(dev); 242 + struct rxe_ucontext *uc; 243 + 244 + uc = rxe_alloc(&rxe->uc_pool); 245 + return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); 246 + } 247 + 248 + static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) 249 + { 250 + struct rxe_ucontext *uc = to_ruc(ibuc); 251 + 252 + rxe_drop_ref(uc); 253 + return 0; 254 + } 255 + 256 + static int rxe_port_immutable(struct ib_device *dev, u8 port_num, 257 + struct ib_port_immutable *immutable) 258 + { 259 + int err; 260 + struct ib_port_attr attr; 261 + 262 + err = rxe_query_port(dev, port_num, &attr); 263 + if (err) 264 + return err; 265 + 266 + immutable->pkey_tbl_len = attr.pkey_tbl_len; 267 + immutable->gid_tbl_len = attr.gid_tbl_len; 268 + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 269 + immutable->max_mad_size = IB_MGMT_MAD_SIZE; 270 + 271 + return 0; 272 + } 273 + 274 + static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, 275 + struct ib_ucontext *context, 276 + struct ib_udata *udata) 277 + { 278 + struct rxe_dev *rxe = to_rdev(dev); 279 + struct rxe_pd *pd; 280 + 281 + pd = rxe_alloc(&rxe->pd_pool); 282 + return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); 283 + } 284 + 285 + static int rxe_dealloc_pd(struct ib_pd *ibpd) 286 + { 287 + struct rxe_pd *pd = to_rpd(ibpd); 288 + 289 + rxe_drop_ref(pd); 290 + return 0; 291 + } 292 + 293 + static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, 294 + struct rxe_av *av) 295 + { 296 + int err; 297 + union ib_gid sgid; 298 + struct ib_gid_attr sgid_attr; 299 + 300 + err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num, 301 + attr->grh.sgid_index, &sgid, 302 + &sgid_attr); 303 + if (err) { 304 + pr_err("Failed to query sgid. err = %d\n", err); 305 + return err; 306 + } 307 + 308 + err = rxe_av_from_attr(rxe, attr->port_num, av, attr); 309 + if (!err) 310 + err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid); 311 + 312 + if (sgid_attr.ndev) 313 + dev_put(sgid_attr.ndev); 314 + return err; 315 + } 316 + 317 + static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 318 + { 319 + int err; 320 + struct rxe_dev *rxe = to_rdev(ibpd->device); 321 + struct rxe_pd *pd = to_rpd(ibpd); 322 + struct rxe_ah *ah; 323 + 324 + err = rxe_av_chk_attr(rxe, attr); 325 + if (err) 326 + goto err1; 327 + 328 + ah = rxe_alloc(&rxe->ah_pool); 329 + if (!ah) { 330 + err = -ENOMEM; 331 + goto err1; 332 + } 333 + 334 + rxe_add_ref(pd); 335 + ah->pd = pd; 336 + 337 + err = rxe_init_av(rxe, attr, &ah->av); 338 + if (err) 339 + goto err2; 340 + 341 + return &ah->ibah; 342 + 343 + err2: 344 + rxe_drop_ref(pd); 345 + rxe_drop_ref(ah); 346 + err1: 347 + return ERR_PTR(err); 348 + } 349 + 350 + static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 351 + { 352 + int err; 353 + struct rxe_dev *rxe = to_rdev(ibah->device); 354 + struct rxe_ah *ah = to_rah(ibah); 355 + 356 + err = rxe_av_chk_attr(rxe, attr); 357 + if (err) 358 + return err; 359 + 360 + err = rxe_init_av(rxe, attr, &ah->av); 361 + if (err) 362 + return err; 363 + 364 + return 0; 365 + } 366 + 367 + static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) 368 + { 369 + struct rxe_dev *rxe = to_rdev(ibah->device); 370 + struct rxe_ah *ah = to_rah(ibah); 371 + 372 + rxe_av_to_attr(rxe, &ah->av, attr); 373 + return 0; 374 + } 375 + 376 + static int rxe_destroy_ah(struct ib_ah *ibah) 377 + { 378 + struct rxe_ah *ah = to_rah(ibah); 379 + 380 + rxe_drop_ref(ah->pd); 381 + rxe_drop_ref(ah); 382 + return 0; 383 + } 384 + 385 + static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) 386 + { 387 + int err; 388 + int i; 389 + u32 length; 390 + struct rxe_recv_wqe *recv_wqe; 391 + int num_sge = ibwr->num_sge; 392 + 393 + if (unlikely(queue_full(rq->queue))) { 394 + err = -ENOMEM; 395 + goto err1; 396 + } 397 + 398 + if (unlikely(num_sge > rq->max_sge)) { 399 + err = -EINVAL; 400 + goto err1; 401 + } 402 + 403 + length = 0; 404 + for (i = 0; i < num_sge; i++) 405 + length += ibwr->sg_list[i].length; 406 + 407 + recv_wqe = producer_addr(rq->queue); 408 + recv_wqe->wr_id = ibwr->wr_id; 409 + recv_wqe->num_sge = num_sge; 410 + 411 + memcpy(recv_wqe->dma.sge, ibwr->sg_list, 412 + num_sge * sizeof(struct ib_sge)); 413 + 414 + recv_wqe->dma.length = length; 415 + recv_wqe->dma.resid = length; 416 + recv_wqe->dma.num_sge = num_sge; 417 + recv_wqe->dma.cur_sge = 0; 418 + recv_wqe->dma.sge_offset = 0; 419 + 420 + /* make sure all changes to the work queue are written before we 421 + * update the producer pointer 422 + */ 423 + smp_wmb(); 424 + 425 + advance_producer(rq->queue); 426 + return 0; 427 + 428 + err1: 429 + return err; 430 + } 431 + 432 + static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, 433 + struct ib_srq_init_attr *init, 434 + struct ib_udata *udata) 435 + { 436 + int err; 437 + struct rxe_dev *rxe = to_rdev(ibpd->device); 438 + struct rxe_pd *pd = to_rpd(ibpd); 439 + struct rxe_srq *srq; 440 + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; 441 + 442 + err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); 443 + if (err) 444 + goto err1; 445 + 446 + srq = rxe_alloc(&rxe->srq_pool); 447 + if (!srq) { 448 + err = -ENOMEM; 449 + goto err1; 450 + } 451 + 452 + rxe_add_index(srq); 453 + rxe_add_ref(pd); 454 + srq->pd = pd; 455 + 456 + err = rxe_srq_from_init(rxe, srq, init, context, udata); 457 + if (err) 458 + goto err2; 459 + 460 + return &srq->ibsrq; 461 + 462 + err2: 463 + rxe_drop_ref(pd); 464 + rxe_drop_index(srq); 465 + rxe_drop_ref(srq); 466 + err1: 467 + return ERR_PTR(err); 468 + } 469 + 470 + static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 471 + enum ib_srq_attr_mask mask, 472 + struct ib_udata *udata) 473 + { 474 + int err; 475 + struct rxe_srq *srq = to_rsrq(ibsrq); 476 + struct rxe_dev *rxe = to_rdev(ibsrq->device); 477 + 478 + err = rxe_srq_chk_attr(rxe, srq, attr, mask); 479 + if (err) 480 + goto err1; 481 + 482 + err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); 483 + if (err) 484 + goto err1; 485 + 486 + return 0; 487 + 488 + err1: 489 + return err; 490 + } 491 + 492 + static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) 493 + { 494 + struct rxe_srq *srq = to_rsrq(ibsrq); 495 + 496 + if (srq->error) 497 + return -EINVAL; 498 + 499 + attr->max_wr = srq->rq.queue->buf->index_mask; 500 + attr->max_sge = srq->rq.max_sge; 501 + attr->srq_limit = srq->limit; 502 + return 0; 503 + } 504 + 505 + static int rxe_destroy_srq(struct ib_srq *ibsrq) 506 + { 507 + struct rxe_srq *srq = to_rsrq(ibsrq); 508 + 509 + if (srq->rq.queue) 510 + rxe_queue_cleanup(srq->rq.queue); 511 + 512 + rxe_drop_ref(srq->pd); 513 + rxe_drop_index(srq); 514 + rxe_drop_ref(srq); 515 + 516 + return 0; 517 + } 518 + 519 + static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 520 + struct ib_recv_wr **bad_wr) 521 + { 522 + int err = 0; 523 + unsigned long flags; 524 + struct rxe_srq *srq = to_rsrq(ibsrq); 525 + 526 + spin_lock_irqsave(&srq->rq.producer_lock, flags); 527 + 528 + while (wr) { 529 + err = post_one_recv(&srq->rq, wr); 530 + if (unlikely(err)) 531 + break; 532 + wr = wr->next; 533 + } 534 + 535 + spin_unlock_irqrestore(&srq->rq.producer_lock, flags); 536 + 537 + if (err) 538 + *bad_wr = wr; 539 + 540 + return err; 541 + } 542 + 543 + static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, 544 + struct ib_qp_init_attr *init, 545 + struct ib_udata *udata) 546 + { 547 + int err; 548 + struct rxe_dev *rxe = to_rdev(ibpd->device); 549 + struct rxe_pd *pd = to_rpd(ibpd); 550 + struct rxe_qp *qp; 551 + 552 + err = rxe_qp_chk_init(rxe, init); 553 + if (err) 554 + goto err1; 555 + 556 + qp = rxe_alloc(&rxe->qp_pool); 557 + if (!qp) { 558 + err = -ENOMEM; 559 + goto err1; 560 + } 561 + 562 + if (udata) { 563 + if (udata->inlen) { 564 + err = -EINVAL; 565 + goto err1; 566 + } 567 + qp->is_user = 1; 568 + } 569 + 570 + rxe_add_index(qp); 571 + 572 + err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); 573 + if (err) 574 + goto err2; 575 + 576 + return &qp->ibqp; 577 + 578 + err2: 579 + rxe_drop_index(qp); 580 + rxe_drop_ref(qp); 581 + err1: 582 + return ERR_PTR(err); 583 + } 584 + 585 + static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 586 + int mask, struct ib_udata *udata) 587 + { 588 + int err; 589 + struct rxe_dev *rxe = to_rdev(ibqp->device); 590 + struct rxe_qp *qp = to_rqp(ibqp); 591 + 592 + err = rxe_qp_chk_attr(rxe, qp, attr, mask); 593 + if (err) 594 + goto err1; 595 + 596 + err = rxe_qp_from_attr(qp, attr, mask, udata); 597 + if (err) 598 + goto err1; 599 + 600 + return 0; 601 + 602 + err1: 603 + return err; 604 + } 605 + 606 + static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 607 + int mask, struct ib_qp_init_attr *init) 608 + { 609 + struct rxe_qp *qp = to_rqp(ibqp); 610 + 611 + rxe_qp_to_init(qp, init); 612 + rxe_qp_to_attr(qp, attr, mask); 613 + 614 + return 0; 615 + } 616 + 617 + static int rxe_destroy_qp(struct ib_qp *ibqp) 618 + { 619 + struct rxe_qp *qp = to_rqp(ibqp); 620 + 621 + rxe_qp_destroy(qp); 622 + rxe_drop_index(qp); 623 + rxe_drop_ref(qp); 624 + return 0; 625 + } 626 + 627 + static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, 628 + unsigned int mask, unsigned int length) 629 + { 630 + int num_sge = ibwr->num_sge; 631 + struct rxe_sq *sq = &qp->sq; 632 + 633 + if (unlikely(num_sge > sq->max_sge)) 634 + goto err1; 635 + 636 + if (unlikely(mask & WR_ATOMIC_MASK)) { 637 + if (length < 8) 638 + goto err1; 639 + 640 + if (atomic_wr(ibwr)->remote_addr & 0x7) 641 + goto err1; 642 + } 643 + 644 + if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && 645 + (length > sq->max_inline))) 646 + goto err1; 647 + 648 + return 0; 649 + 650 + err1: 651 + return -EINVAL; 652 + } 653 + 654 + static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, 655 + struct ib_send_wr *ibwr) 656 + { 657 + wr->wr_id = ibwr->wr_id; 658 + wr->num_sge = ibwr->num_sge; 659 + wr->opcode = ibwr->opcode; 660 + wr->send_flags = ibwr->send_flags; 661 + 662 + if (qp_type(qp) == IB_QPT_UD || 663 + qp_type(qp) == IB_QPT_SMI || 664 + qp_type(qp) == IB_QPT_GSI) { 665 + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; 666 + wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; 667 + if (qp_type(qp) == IB_QPT_GSI) 668 + wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; 669 + if (wr->opcode == IB_WR_SEND_WITH_IMM) 670 + wr->ex.imm_data = ibwr->ex.imm_data; 671 + } else { 672 + switch (wr->opcode) { 673 + case IB_WR_RDMA_WRITE_WITH_IMM: 674 + wr->ex.imm_data = ibwr->ex.imm_data; 675 + case IB_WR_RDMA_READ: 676 + case IB_WR_RDMA_WRITE: 677 + wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; 678 + wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; 679 + break; 680 + case IB_WR_SEND_WITH_IMM: 681 + wr->ex.imm_data = ibwr->ex.imm_data; 682 + break; 683 + case IB_WR_SEND_WITH_INV: 684 + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 685 + break; 686 + case IB_WR_ATOMIC_CMP_AND_SWP: 687 + case IB_WR_ATOMIC_FETCH_AND_ADD: 688 + wr->wr.atomic.remote_addr = 689 + atomic_wr(ibwr)->remote_addr; 690 + wr->wr.atomic.compare_add = 691 + atomic_wr(ibwr)->compare_add; 692 + wr->wr.atomic.swap = atomic_wr(ibwr)->swap; 693 + wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; 694 + break; 695 + case IB_WR_LOCAL_INV: 696 + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 697 + break; 698 + case IB_WR_REG_MR: 699 + wr->wr.reg.mr = reg_wr(ibwr)->mr; 700 + wr->wr.reg.key = reg_wr(ibwr)->key; 701 + wr->wr.reg.access = reg_wr(ibwr)->access; 702 + break; 703 + default: 704 + break; 705 + } 706 + } 707 + } 708 + 709 + static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, 710 + unsigned int mask, unsigned int length, 711 + struct rxe_send_wqe *wqe) 712 + { 713 + int num_sge = ibwr->num_sge; 714 + struct ib_sge *sge; 715 + int i; 716 + u8 *p; 717 + 718 + init_send_wr(qp, &wqe->wr, ibwr); 719 + 720 + if (qp_type(qp) == IB_QPT_UD || 721 + qp_type(qp) == IB_QPT_SMI || 722 + qp_type(qp) == IB_QPT_GSI) 723 + memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); 724 + 725 + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { 726 + p = wqe->dma.inline_data; 727 + 728 + sge = ibwr->sg_list; 729 + for (i = 0; i < num_sge; i++, sge++) { 730 + if (qp->is_user && copy_from_user(p, (__user void *) 731 + (uintptr_t)sge->addr, sge->length)) 732 + return -EFAULT; 733 + 734 + else if (!qp->is_user) 735 + memcpy(p, (void *)(uintptr_t)sge->addr, 736 + sge->length); 737 + 738 + p += sge->length; 739 + } 740 + } else if (mask & WR_REG_MASK) { 741 + wqe->mask = mask; 742 + wqe->state = wqe_state_posted; 743 + return 0; 744 + } else 745 + memcpy(wqe->dma.sge, ibwr->sg_list, 746 + num_sge * sizeof(struct ib_sge)); 747 + 748 + wqe->iova = (mask & WR_ATOMIC_MASK) ? 749 + atomic_wr(ibwr)->remote_addr : 750 + rdma_wr(ibwr)->remote_addr; 751 + wqe->mask = mask; 752 + wqe->dma.length = length; 753 + wqe->dma.resid = length; 754 + wqe->dma.num_sge = num_sge; 755 + wqe->dma.cur_sge = 0; 756 + wqe->dma.sge_offset = 0; 757 + wqe->state = wqe_state_posted; 758 + wqe->ssn = atomic_add_return(1, &qp->ssn); 759 + 760 + return 0; 761 + } 762 + 763 + static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, 764 + unsigned mask, u32 length) 765 + { 766 + int err; 767 + struct rxe_sq *sq = &qp->sq; 768 + struct rxe_send_wqe *send_wqe; 769 + unsigned long flags; 770 + 771 + err = validate_send_wr(qp, ibwr, mask, length); 772 + if (err) 773 + return err; 774 + 775 + spin_lock_irqsave(&qp->sq.sq_lock, flags); 776 + 777 + if (unlikely(queue_full(sq->queue))) { 778 + err = -ENOMEM; 779 + goto err1; 780 + } 781 + 782 + send_wqe = producer_addr(sq->queue); 783 + 784 + err = init_send_wqe(qp, ibwr, mask, length, send_wqe); 785 + if (unlikely(err)) 786 + goto err1; 787 + 788 + /* 789 + * make sure all changes to the work queue are 790 + * written before we update the producer pointer 791 + */ 792 + smp_wmb(); 793 + 794 + advance_producer(sq->queue); 795 + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 796 + 797 + return 0; 798 + 799 + err1: 800 + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 801 + return err; 802 + } 803 + 804 + static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 805 + struct ib_send_wr **bad_wr) 806 + { 807 + int err = 0; 808 + struct rxe_qp *qp = to_rqp(ibqp); 809 + unsigned int mask; 810 + unsigned int length = 0; 811 + int i; 812 + int must_sched; 813 + 814 + if (unlikely(!qp->valid)) { 815 + *bad_wr = wr; 816 + return -EINVAL; 817 + } 818 + 819 + if (unlikely(qp->req.state < QP_STATE_READY)) { 820 + *bad_wr = wr; 821 + return -EINVAL; 822 + } 823 + 824 + while (wr) { 825 + mask = wr_opcode_mask(wr->opcode, qp); 826 + if (unlikely(!mask)) { 827 + err = -EINVAL; 828 + *bad_wr = wr; 829 + break; 830 + } 831 + 832 + if (unlikely((wr->send_flags & IB_SEND_INLINE) && 833 + !(mask & WR_INLINE_MASK))) { 834 + err = -EINVAL; 835 + *bad_wr = wr; 836 + break; 837 + } 838 + 839 + length = 0; 840 + for (i = 0; i < wr->num_sge; i++) 841 + length += wr->sg_list[i].length; 842 + 843 + err = post_one_send(qp, wr, mask, length); 844 + 845 + if (err) { 846 + *bad_wr = wr; 847 + break; 848 + } 849 + wr = wr->next; 850 + } 851 + 852 + /* 853 + * Must sched in case of GSI QP because ib_send_mad() hold irq lock, 854 + * and the requester call ip_local_out_sk() that takes spin_lock_bh. 855 + */ 856 + must_sched = (qp_type(qp) == IB_QPT_GSI) || 857 + (queue_count(qp->sq.queue) > 1); 858 + 859 + rxe_run_task(&qp->req.task, must_sched); 860 + 861 + return err; 862 + } 863 + 864 + static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 865 + struct ib_recv_wr **bad_wr) 866 + { 867 + int err = 0; 868 + struct rxe_qp *qp = to_rqp(ibqp); 869 + struct rxe_rq *rq = &qp->rq; 870 + unsigned long flags; 871 + 872 + if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { 873 + *bad_wr = wr; 874 + err = -EINVAL; 875 + goto err1; 876 + } 877 + 878 + if (unlikely(qp->srq)) { 879 + *bad_wr = wr; 880 + err = -EINVAL; 881 + goto err1; 882 + } 883 + 884 + spin_lock_irqsave(&rq->producer_lock, flags); 885 + 886 + while (wr) { 887 + err = post_one_recv(rq, wr); 888 + if (unlikely(err)) { 889 + *bad_wr = wr; 890 + break; 891 + } 892 + wr = wr->next; 893 + } 894 + 895 + spin_unlock_irqrestore(&rq->producer_lock, flags); 896 + 897 + err1: 898 + return err; 899 + } 900 + 901 + static struct ib_cq *rxe_create_cq(struct ib_device *dev, 902 + const struct ib_cq_init_attr *attr, 903 + struct ib_ucontext *context, 904 + struct ib_udata *udata) 905 + { 906 + int err; 907 + struct rxe_dev *rxe = to_rdev(dev); 908 + struct rxe_cq *cq; 909 + 910 + if (attr->flags) 911 + return ERR_PTR(-EINVAL); 912 + 913 + err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); 914 + if (err) 915 + goto err1; 916 + 917 + cq = rxe_alloc(&rxe->cq_pool); 918 + if (!cq) { 919 + err = -ENOMEM; 920 + goto err1; 921 + } 922 + 923 + err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, 924 + context, udata); 925 + if (err) 926 + goto err2; 927 + 928 + return &cq->ibcq; 929 + 930 + err2: 931 + rxe_drop_ref(cq); 932 + err1: 933 + return ERR_PTR(err); 934 + } 935 + 936 + static int rxe_destroy_cq(struct ib_cq *ibcq) 937 + { 938 + struct rxe_cq *cq = to_rcq(ibcq); 939 + 940 + rxe_drop_ref(cq); 941 + return 0; 942 + } 943 + 944 + static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 945 + { 946 + int err; 947 + struct rxe_cq *cq = to_rcq(ibcq); 948 + struct rxe_dev *rxe = to_rdev(ibcq->device); 949 + 950 + err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); 951 + if (err) 952 + goto err1; 953 + 954 + err = rxe_cq_resize_queue(cq, cqe, udata); 955 + if (err) 956 + goto err1; 957 + 958 + return 0; 959 + 960 + err1: 961 + return err; 962 + } 963 + 964 + static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 965 + { 966 + int i; 967 + struct rxe_cq *cq = to_rcq(ibcq); 968 + struct rxe_cqe *cqe; 969 + unsigned long flags; 970 + 971 + spin_lock_irqsave(&cq->cq_lock, flags); 972 + for (i = 0; i < num_entries; i++) { 973 + cqe = queue_head(cq->queue); 974 + if (!cqe) 975 + break; 976 + 977 + memcpy(wc++, &cqe->ibwc, sizeof(*wc)); 978 + advance_consumer(cq->queue); 979 + } 980 + spin_unlock_irqrestore(&cq->cq_lock, flags); 981 + 982 + return i; 983 + } 984 + 985 + static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) 986 + { 987 + struct rxe_cq *cq = to_rcq(ibcq); 988 + int count = queue_count(cq->queue); 989 + 990 + return (count > wc_cnt) ? wc_cnt : count; 991 + } 992 + 993 + static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 994 + { 995 + struct rxe_cq *cq = to_rcq(ibcq); 996 + 997 + if (cq->notify != IB_CQ_NEXT_COMP) 998 + cq->notify = flags & IB_CQ_SOLICITED_MASK; 999 + 1000 + return 0; 1001 + } 1002 + 1003 + static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) 1004 + { 1005 + struct rxe_dev *rxe = to_rdev(ibpd->device); 1006 + struct rxe_pd *pd = to_rpd(ibpd); 1007 + struct rxe_mem *mr; 1008 + int err; 1009 + 1010 + mr = rxe_alloc(&rxe->mr_pool); 1011 + if (!mr) { 1012 + err = -ENOMEM; 1013 + goto err1; 1014 + } 1015 + 1016 + rxe_add_index(mr); 1017 + 1018 + rxe_add_ref(pd); 1019 + 1020 + err = rxe_mem_init_dma(rxe, pd, access, mr); 1021 + if (err) 1022 + goto err2; 1023 + 1024 + return &mr->ibmr; 1025 + 1026 + err2: 1027 + rxe_drop_ref(pd); 1028 + rxe_drop_index(mr); 1029 + rxe_drop_ref(mr); 1030 + err1: 1031 + return ERR_PTR(err); 1032 + } 1033 + 1034 + static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, 1035 + u64 start, 1036 + u64 length, 1037 + u64 iova, 1038 + int access, struct ib_udata *udata) 1039 + { 1040 + int err; 1041 + struct rxe_dev *rxe = to_rdev(ibpd->device); 1042 + struct rxe_pd *pd = to_rpd(ibpd); 1043 + struct rxe_mem *mr; 1044 + 1045 + mr = rxe_alloc(&rxe->mr_pool); 1046 + if (!mr) { 1047 + err = -ENOMEM; 1048 + goto err2; 1049 + } 1050 + 1051 + rxe_add_index(mr); 1052 + 1053 + rxe_add_ref(pd); 1054 + 1055 + err = rxe_mem_init_user(rxe, pd, start, length, iova, 1056 + access, udata, mr); 1057 + if (err) 1058 + goto err3; 1059 + 1060 + return &mr->ibmr; 1061 + 1062 + err3: 1063 + rxe_drop_ref(pd); 1064 + rxe_drop_index(mr); 1065 + rxe_drop_ref(mr); 1066 + err2: 1067 + return ERR_PTR(err); 1068 + } 1069 + 1070 + static int rxe_dereg_mr(struct ib_mr *ibmr) 1071 + { 1072 + struct rxe_mem *mr = to_rmr(ibmr); 1073 + 1074 + mr->state = RXE_MEM_STATE_ZOMBIE; 1075 + rxe_drop_ref(mr->pd); 1076 + rxe_drop_index(mr); 1077 + rxe_drop_ref(mr); 1078 + return 0; 1079 + } 1080 + 1081 + static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, 1082 + enum ib_mr_type mr_type, 1083 + u32 max_num_sg) 1084 + { 1085 + struct rxe_dev *rxe = to_rdev(ibpd->device); 1086 + struct rxe_pd *pd = to_rpd(ibpd); 1087 + struct rxe_mem *mr; 1088 + int err; 1089 + 1090 + if (mr_type != IB_MR_TYPE_MEM_REG) 1091 + return ERR_PTR(-EINVAL); 1092 + 1093 + mr = rxe_alloc(&rxe->mr_pool); 1094 + if (!mr) { 1095 + err = -ENOMEM; 1096 + goto err1; 1097 + } 1098 + 1099 + rxe_add_index(mr); 1100 + 1101 + rxe_add_ref(pd); 1102 + 1103 + err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr); 1104 + if (err) 1105 + goto err2; 1106 + 1107 + return &mr->ibmr; 1108 + 1109 + err2: 1110 + rxe_drop_ref(pd); 1111 + rxe_drop_index(mr); 1112 + rxe_drop_ref(mr); 1113 + err1: 1114 + return ERR_PTR(err); 1115 + } 1116 + 1117 + static int rxe_set_page(struct ib_mr *ibmr, u64 addr) 1118 + { 1119 + struct rxe_mem *mr = to_rmr(ibmr); 1120 + struct rxe_map *map; 1121 + struct rxe_phys_buf *buf; 1122 + 1123 + if (unlikely(mr->nbuf == mr->num_buf)) 1124 + return -ENOMEM; 1125 + 1126 + map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; 1127 + buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; 1128 + 1129 + buf->addr = addr; 1130 + buf->size = ibmr->page_size; 1131 + mr->nbuf++; 1132 + 1133 + return 0; 1134 + } 1135 + 1136 + static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1137 + unsigned int *sg_offset) 1138 + { 1139 + struct rxe_mem *mr = to_rmr(ibmr); 1140 + int n; 1141 + 1142 + mr->nbuf = 0; 1143 + 1144 + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); 1145 + 1146 + mr->va = ibmr->iova; 1147 + mr->iova = ibmr->iova; 1148 + mr->length = ibmr->length; 1149 + mr->page_shift = ilog2(ibmr->page_size); 1150 + mr->page_mask = ibmr->page_size - 1; 1151 + mr->offset = mr->iova & mr->page_mask; 1152 + 1153 + return n; 1154 + } 1155 + 1156 + static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1157 + { 1158 + int err; 1159 + struct rxe_dev *rxe = to_rdev(ibqp->device); 1160 + struct rxe_qp *qp = to_rqp(ibqp); 1161 + struct rxe_mc_grp *grp; 1162 + 1163 + /* takes a ref on grp if successful */ 1164 + err = rxe_mcast_get_grp(rxe, mgid, &grp); 1165 + if (err) 1166 + return err; 1167 + 1168 + err = rxe_mcast_add_grp_elem(rxe, qp, grp); 1169 + 1170 + rxe_drop_ref(grp); 1171 + return err; 1172 + } 1173 + 1174 + static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1175 + { 1176 + struct rxe_dev *rxe = to_rdev(ibqp->device); 1177 + struct rxe_qp *qp = to_rqp(ibqp); 1178 + 1179 + return rxe_mcast_drop_grp_elem(rxe, qp, mgid); 1180 + } 1181 + 1182 + static ssize_t rxe_show_parent(struct device *device, 1183 + struct device_attribute *attr, char *buf) 1184 + { 1185 + struct rxe_dev *rxe = container_of(device, struct rxe_dev, 1186 + ib_dev.dev); 1187 + char *name; 1188 + 1189 + name = rxe->ifc_ops->parent_name(rxe, 1); 1190 + return snprintf(buf, 16, "%s\n", name); 1191 + } 1192 + 1193 + static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); 1194 + 1195 + static struct device_attribute *rxe_dev_attributes[] = { 1196 + &dev_attr_parent, 1197 + }; 1198 + 1199 + int rxe_register_device(struct rxe_dev *rxe) 1200 + { 1201 + int err; 1202 + int i; 1203 + struct ib_device *dev = &rxe->ib_dev; 1204 + 1205 + strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); 1206 + strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1207 + 1208 + dev->owner = THIS_MODULE; 1209 + dev->node_type = RDMA_NODE_IB_CA; 1210 + dev->phys_port_cnt = 1; 1211 + dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; 1212 + dev->dma_device = rxe->ifc_ops->dma_device(rxe); 1213 + dev->local_dma_lkey = 0; 1214 + dev->node_guid = rxe->ifc_ops->node_guid(rxe); 1215 + dev->dma_ops = &rxe_dma_mapping_ops; 1216 + 1217 + dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; 1218 + dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) 1219 + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) 1220 + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) 1221 + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) 1222 + | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) 1223 + | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) 1224 + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) 1225 + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) 1226 + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) 1227 + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) 1228 + | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) 1229 + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) 1230 + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) 1231 + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) 1232 + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) 1233 + | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) 1234 + | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) 1235 + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) 1236 + | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) 1237 + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) 1238 + | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) 1239 + | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) 1240 + | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) 1241 + | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) 1242 + | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) 1243 + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) 1244 + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) 1245 + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) 1246 + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) 1247 + | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) 1248 + | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) 1249 + ; 1250 + 1251 + dev->query_device = rxe_query_device; 1252 + dev->modify_device = rxe_modify_device; 1253 + dev->query_port = rxe_query_port; 1254 + dev->modify_port = rxe_modify_port; 1255 + dev->get_link_layer = rxe_get_link_layer; 1256 + dev->query_gid = rxe_query_gid; 1257 + dev->get_netdev = rxe_get_netdev; 1258 + dev->add_gid = rxe_add_gid; 1259 + dev->del_gid = rxe_del_gid; 1260 + dev->query_pkey = rxe_query_pkey; 1261 + dev->alloc_ucontext = rxe_alloc_ucontext; 1262 + dev->dealloc_ucontext = rxe_dealloc_ucontext; 1263 + dev->mmap = rxe_mmap; 1264 + dev->get_port_immutable = rxe_port_immutable; 1265 + dev->alloc_pd = rxe_alloc_pd; 1266 + dev->dealloc_pd = rxe_dealloc_pd; 1267 + dev->create_ah = rxe_create_ah; 1268 + dev->modify_ah = rxe_modify_ah; 1269 + dev->query_ah = rxe_query_ah; 1270 + dev->destroy_ah = rxe_destroy_ah; 1271 + dev->create_srq = rxe_create_srq; 1272 + dev->modify_srq = rxe_modify_srq; 1273 + dev->query_srq = rxe_query_srq; 1274 + dev->destroy_srq = rxe_destroy_srq; 1275 + dev->post_srq_recv = rxe_post_srq_recv; 1276 + dev->create_qp = rxe_create_qp; 1277 + dev->modify_qp = rxe_modify_qp; 1278 + dev->query_qp = rxe_query_qp; 1279 + dev->destroy_qp = rxe_destroy_qp; 1280 + dev->post_send = rxe_post_send; 1281 + dev->post_recv = rxe_post_recv; 1282 + dev->create_cq = rxe_create_cq; 1283 + dev->destroy_cq = rxe_destroy_cq; 1284 + dev->resize_cq = rxe_resize_cq; 1285 + dev->poll_cq = rxe_poll_cq; 1286 + dev->peek_cq = rxe_peek_cq; 1287 + dev->req_notify_cq = rxe_req_notify_cq; 1288 + dev->get_dma_mr = rxe_get_dma_mr; 1289 + dev->reg_user_mr = rxe_reg_user_mr; 1290 + dev->dereg_mr = rxe_dereg_mr; 1291 + dev->alloc_mr = rxe_alloc_mr; 1292 + dev->map_mr_sg = rxe_map_mr_sg; 1293 + dev->attach_mcast = rxe_attach_mcast; 1294 + dev->detach_mcast = rxe_detach_mcast; 1295 + 1296 + err = ib_register_device(dev, NULL); 1297 + if (err) { 1298 + pr_warn("rxe_register_device failed, err = %d\n", err); 1299 + goto err1; 1300 + } 1301 + 1302 + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { 1303 + err = device_create_file(&dev->dev, rxe_dev_attributes[i]); 1304 + if (err) { 1305 + pr_warn("device_create_file failed, i = %d, err = %d\n", 1306 + i, err); 1307 + goto err2; 1308 + } 1309 + } 1310 + 1311 + return 0; 1312 + 1313 + err2: 1314 + ib_unregister_device(dev); 1315 + err1: 1316 + return err; 1317 + } 1318 + 1319 + int rxe_unregister_device(struct rxe_dev *rxe) 1320 + { 1321 + int i; 1322 + struct ib_device *dev = &rxe->ib_dev; 1323 + 1324 + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) 1325 + device_remove_file(&dev->dev, rxe_dev_attributes[i]); 1326 + 1327 + ib_unregister_device(dev); 1328 + 1329 + return 0; 1330 + }
+480
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef RXE_VERBS_H 35 + #define RXE_VERBS_H 36 + 37 + #include <linux/interrupt.h> 38 + #include <rdma/rdma_user_rxe.h> 39 + #include "rxe_pool.h" 40 + #include "rxe_task.h" 41 + 42 + static inline int pkey_match(u16 key1, u16 key2) 43 + { 44 + return (((key1 & 0x7fff) != 0) && 45 + ((key1 & 0x7fff) == (key2 & 0x7fff)) && 46 + ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0; 47 + } 48 + 49 + /* Return >0 if psn_a > psn_b 50 + * 0 if psn_a == psn_b 51 + * <0 if psn_a < psn_b 52 + */ 53 + static inline int psn_compare(u32 psn_a, u32 psn_b) 54 + { 55 + s32 diff; 56 + 57 + diff = (psn_a - psn_b) << 8; 58 + return diff; 59 + } 60 + 61 + struct rxe_ucontext { 62 + struct rxe_pool_entry pelem; 63 + struct ib_ucontext ibuc; 64 + }; 65 + 66 + struct rxe_pd { 67 + struct rxe_pool_entry pelem; 68 + struct ib_pd ibpd; 69 + }; 70 + 71 + struct rxe_ah { 72 + struct rxe_pool_entry pelem; 73 + struct ib_ah ibah; 74 + struct rxe_pd *pd; 75 + struct rxe_av av; 76 + }; 77 + 78 + struct rxe_cqe { 79 + union { 80 + struct ib_wc ibwc; 81 + struct ib_uverbs_wc uibwc; 82 + }; 83 + }; 84 + 85 + struct rxe_cq { 86 + struct rxe_pool_entry pelem; 87 + struct ib_cq ibcq; 88 + struct rxe_queue *queue; 89 + spinlock_t cq_lock; 90 + u8 notify; 91 + int is_user; 92 + struct tasklet_struct comp_task; 93 + }; 94 + 95 + enum wqe_state { 96 + wqe_state_posted, 97 + wqe_state_processing, 98 + wqe_state_pending, 99 + wqe_state_done, 100 + wqe_state_error, 101 + }; 102 + 103 + struct rxe_sq { 104 + int max_wr; 105 + int max_sge; 106 + int max_inline; 107 + spinlock_t sq_lock; /* guard queue */ 108 + struct rxe_queue *queue; 109 + }; 110 + 111 + struct rxe_rq { 112 + int max_wr; 113 + int max_sge; 114 + spinlock_t producer_lock; /* guard queue producer */ 115 + spinlock_t consumer_lock; /* guard queue consumer */ 116 + struct rxe_queue *queue; 117 + }; 118 + 119 + struct rxe_srq { 120 + struct rxe_pool_entry pelem; 121 + struct ib_srq ibsrq; 122 + struct rxe_pd *pd; 123 + struct rxe_rq rq; 124 + u32 srq_num; 125 + 126 + int limit; 127 + int error; 128 + }; 129 + 130 + enum rxe_qp_state { 131 + QP_STATE_RESET, 132 + QP_STATE_INIT, 133 + QP_STATE_READY, 134 + QP_STATE_DRAIN, /* req only */ 135 + QP_STATE_DRAINED, /* req only */ 136 + QP_STATE_ERROR 137 + }; 138 + 139 + extern char *rxe_qp_state_name[]; 140 + 141 + struct rxe_req_info { 142 + enum rxe_qp_state state; 143 + int wqe_index; 144 + u32 psn; 145 + int opcode; 146 + atomic_t rd_atomic; 147 + int wait_fence; 148 + int need_rd_atomic; 149 + int wait_psn; 150 + int need_retry; 151 + int noack_pkts; 152 + struct rxe_task task; 153 + }; 154 + 155 + struct rxe_comp_info { 156 + u32 psn; 157 + int opcode; 158 + int timeout; 159 + int timeout_retry; 160 + u32 retry_cnt; 161 + u32 rnr_retry; 162 + struct rxe_task task; 163 + }; 164 + 165 + enum rdatm_res_state { 166 + rdatm_res_state_next, 167 + rdatm_res_state_new, 168 + rdatm_res_state_replay, 169 + }; 170 + 171 + struct resp_res { 172 + int type; 173 + u32 first_psn; 174 + u32 last_psn; 175 + u32 cur_psn; 176 + enum rdatm_res_state state; 177 + 178 + union { 179 + struct { 180 + struct sk_buff *skb; 181 + } atomic; 182 + struct { 183 + struct rxe_mem *mr; 184 + u64 va_org; 185 + u32 rkey; 186 + u32 length; 187 + u64 va; 188 + u32 resid; 189 + } read; 190 + }; 191 + }; 192 + 193 + struct rxe_resp_info { 194 + enum rxe_qp_state state; 195 + u32 msn; 196 + u32 psn; 197 + int opcode; 198 + int drop_msg; 199 + int goto_error; 200 + int sent_psn_nak; 201 + enum ib_wc_status status; 202 + u8 aeth_syndrome; 203 + 204 + /* Receive only */ 205 + struct rxe_recv_wqe *wqe; 206 + 207 + /* RDMA read / atomic only */ 208 + u64 va; 209 + struct rxe_mem *mr; 210 + u32 resid; 211 + u32 rkey; 212 + u64 atomic_orig; 213 + 214 + /* SRQ only */ 215 + struct { 216 + struct rxe_recv_wqe wqe; 217 + struct ib_sge sge[RXE_MAX_SGE]; 218 + } srq_wqe; 219 + 220 + /* Responder resources. It's a circular list where the oldest 221 + * resource is dropped first. 222 + */ 223 + struct resp_res *resources; 224 + unsigned int res_head; 225 + unsigned int res_tail; 226 + struct resp_res *res; 227 + struct rxe_task task; 228 + }; 229 + 230 + struct rxe_qp { 231 + struct rxe_pool_entry pelem; 232 + struct ib_qp ibqp; 233 + struct ib_qp_attr attr; 234 + unsigned int valid; 235 + unsigned int mtu; 236 + int is_user; 237 + 238 + struct rxe_pd *pd; 239 + struct rxe_srq *srq; 240 + struct rxe_cq *scq; 241 + struct rxe_cq *rcq; 242 + 243 + enum ib_sig_type sq_sig_type; 244 + 245 + struct rxe_sq sq; 246 + struct rxe_rq rq; 247 + 248 + struct socket *sk; 249 + 250 + struct rxe_av pri_av; 251 + struct rxe_av alt_av; 252 + 253 + /* list of mcast groups qp has joined (for cleanup) */ 254 + struct list_head grp_list; 255 + spinlock_t grp_lock; /* guard grp_list */ 256 + 257 + struct sk_buff_head req_pkts; 258 + struct sk_buff_head resp_pkts; 259 + struct sk_buff_head send_pkts; 260 + 261 + struct rxe_req_info req; 262 + struct rxe_comp_info comp; 263 + struct rxe_resp_info resp; 264 + 265 + atomic_t ssn; 266 + atomic_t skb_out; 267 + int need_req_skb; 268 + 269 + /* Timer for retranmitting packet when ACKs have been lost. RC 270 + * only. The requester sets it when it is not already 271 + * started. The responder resets it whenever an ack is 272 + * received. 273 + */ 274 + struct timer_list retrans_timer; 275 + u64 qp_timeout_jiffies; 276 + 277 + /* Timer for handling RNR NAKS. */ 278 + struct timer_list rnr_nak_timer; 279 + 280 + spinlock_t state_lock; /* guard requester and completer */ 281 + }; 282 + 283 + enum rxe_mem_state { 284 + RXE_MEM_STATE_ZOMBIE, 285 + RXE_MEM_STATE_INVALID, 286 + RXE_MEM_STATE_FREE, 287 + RXE_MEM_STATE_VALID, 288 + }; 289 + 290 + enum rxe_mem_type { 291 + RXE_MEM_TYPE_NONE, 292 + RXE_MEM_TYPE_DMA, 293 + RXE_MEM_TYPE_MR, 294 + RXE_MEM_TYPE_FMR, 295 + RXE_MEM_TYPE_MW, 296 + }; 297 + 298 + #define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) 299 + 300 + struct rxe_phys_buf { 301 + u64 addr; 302 + u64 size; 303 + }; 304 + 305 + struct rxe_map { 306 + struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; 307 + }; 308 + 309 + struct rxe_mem { 310 + struct rxe_pool_entry pelem; 311 + union { 312 + struct ib_mr ibmr; 313 + struct ib_mw ibmw; 314 + }; 315 + 316 + struct rxe_pd *pd; 317 + struct ib_umem *umem; 318 + 319 + u32 lkey; 320 + u32 rkey; 321 + 322 + enum rxe_mem_state state; 323 + enum rxe_mem_type type; 324 + u64 va; 325 + u64 iova; 326 + size_t length; 327 + u32 offset; 328 + int access; 329 + 330 + int page_shift; 331 + int page_mask; 332 + int map_shift; 333 + int map_mask; 334 + 335 + u32 num_buf; 336 + u32 nbuf; 337 + 338 + u32 max_buf; 339 + u32 num_map; 340 + 341 + struct rxe_map **map; 342 + }; 343 + 344 + struct rxe_mc_grp { 345 + struct rxe_pool_entry pelem; 346 + spinlock_t mcg_lock; /* guard group */ 347 + struct rxe_dev *rxe; 348 + struct list_head qp_list; 349 + union ib_gid mgid; 350 + int num_qp; 351 + u32 qkey; 352 + u16 pkey; 353 + }; 354 + 355 + struct rxe_mc_elem { 356 + struct rxe_pool_entry pelem; 357 + struct list_head qp_list; 358 + struct list_head grp_list; 359 + struct rxe_qp *qp; 360 + struct rxe_mc_grp *grp; 361 + }; 362 + 363 + struct rxe_port { 364 + struct ib_port_attr attr; 365 + u16 *pkey_tbl; 366 + __be64 port_guid; 367 + __be64 subnet_prefix; 368 + spinlock_t port_lock; /* guard port */ 369 + unsigned int mtu_cap; 370 + /* special QPs */ 371 + u32 qp_smi_index; 372 + u32 qp_gsi_index; 373 + }; 374 + 375 + /* callbacks from rdma_rxe to network interface layer */ 376 + struct rxe_ifc_ops { 377 + void (*release)(struct rxe_dev *rxe); 378 + __be64 (*node_guid)(struct rxe_dev *rxe); 379 + __be64 (*port_guid)(struct rxe_dev *rxe); 380 + struct device *(*dma_device)(struct rxe_dev *rxe); 381 + int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid); 382 + int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid); 383 + int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 384 + struct sk_buff *skb, u32 *crc); 385 + int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 386 + struct sk_buff *skb); 387 + int (*loopback)(struct sk_buff *skb); 388 + struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av, 389 + int paylen, struct rxe_pkt_info *pkt); 390 + char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num); 391 + enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe, 392 + unsigned int port_num); 393 + }; 394 + 395 + struct rxe_dev { 396 + struct ib_device ib_dev; 397 + struct ib_device_attr attr; 398 + int max_ucontext; 399 + int max_inline_data; 400 + struct kref ref_cnt; 401 + struct mutex usdev_lock; 402 + 403 + struct rxe_ifc_ops *ifc_ops; 404 + 405 + struct net_device *ndev; 406 + 407 + int xmit_errors; 408 + 409 + struct rxe_pool uc_pool; 410 + struct rxe_pool pd_pool; 411 + struct rxe_pool ah_pool; 412 + struct rxe_pool srq_pool; 413 + struct rxe_pool qp_pool; 414 + struct rxe_pool cq_pool; 415 + struct rxe_pool mr_pool; 416 + struct rxe_pool mw_pool; 417 + struct rxe_pool mc_grp_pool; 418 + struct rxe_pool mc_elem_pool; 419 + 420 + spinlock_t pending_lock; /* guard pending_mmaps */ 421 + struct list_head pending_mmaps; 422 + 423 + spinlock_t mmap_offset_lock; /* guard mmap_offset */ 424 + int mmap_offset; 425 + 426 + struct rxe_port port; 427 + struct list_head list; 428 + }; 429 + 430 + static inline struct rxe_dev *to_rdev(struct ib_device *dev) 431 + { 432 + return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL; 433 + } 434 + 435 + static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc) 436 + { 437 + return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL; 438 + } 439 + 440 + static inline struct rxe_pd *to_rpd(struct ib_pd *pd) 441 + { 442 + return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL; 443 + } 444 + 445 + static inline struct rxe_ah *to_rah(struct ib_ah *ah) 446 + { 447 + return ah ? container_of(ah, struct rxe_ah, ibah) : NULL; 448 + } 449 + 450 + static inline struct rxe_srq *to_rsrq(struct ib_srq *srq) 451 + { 452 + return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL; 453 + } 454 + 455 + static inline struct rxe_qp *to_rqp(struct ib_qp *qp) 456 + { 457 + return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL; 458 + } 459 + 460 + static inline struct rxe_cq *to_rcq(struct ib_cq *cq) 461 + { 462 + return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL; 463 + } 464 + 465 + static inline struct rxe_mem *to_rmr(struct ib_mr *mr) 466 + { 467 + return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL; 468 + } 469 + 470 + static inline struct rxe_mem *to_rmw(struct ib_mw *mw) 471 + { 472 + return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; 473 + } 474 + 475 + int rxe_register_device(struct rxe_dev *rxe); 476 + int rxe_unregister_device(struct rxe_dev *rxe); 477 + 478 + void rxe_mc_cleanup(void *arg); 479 + 480 + #endif /* RXE_VERBS_H */
+1 -2
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 1967 1967 priv->hca_caps = hca->attrs.device_cap_flags; 1968 1968 1969 1969 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 1970 - priv->dev->hw_features = NETIF_F_SG | 1971 - NETIF_F_IP_CSUM | NETIF_F_RXCSUM; 1970 + priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; 1972 1971 1973 1972 if (priv->hca_caps & IB_DEVICE_UD_TSO) 1974 1973 priv->dev->hw_features |= NETIF_F_TSO;
+5 -5
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 135 135 .cap = { 136 136 .max_send_wr = ipoib_sendq_size, 137 137 .max_recv_wr = ipoib_recvq_size, 138 - .max_send_sge = 1, 138 + .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, 139 + MAX_SKB_FRAGS + 1), 139 140 .max_recv_sge = IPOIB_UD_RX_SG 140 141 }, 141 142 .sq_sig_type = IB_SIGNAL_ALL_WR, ··· 206 205 if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) 207 206 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 208 207 209 - if (dev->features & NETIF_F_SG) 210 - init_attr.cap.max_send_sge = 211 - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); 212 - 213 208 priv->qp = ib_create_qp(priv->pd, &init_attr); 214 209 if (IS_ERR(priv->qp)) { 215 210 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); ··· 230 233 231 234 priv->rx_wr.next = NULL; 232 235 priv->rx_wr.sg_list = priv->rx_sge; 236 + 237 + if (init_attr.cap.max_send_sge > 1) 238 + dev->features |= NETIF_F_SG; 233 239 234 240 priv->max_send_sge = init_attr.cap.max_send_sge; 235 241
+40
drivers/net/ethernet/mellanox/mlx4/fw.c
··· 721 721 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 722 722 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 723 723 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c 724 + #define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c 724 725 #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d 725 726 #define QUERY_DEV_CAP_VXLAN 0x9e 726 727 #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 ··· 936 935 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; 937 936 if (field32 & (1 << 7)) 938 937 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; 938 + MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); 939 + if (field32 & (1 << 17)) 940 + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; 939 941 MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); 940 942 if (field & 1<<6) 941 943 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; ··· 2459 2455 return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, 2460 2456 MLX4_CMD_NATIVE); 2461 2457 } 2458 + 2459 + int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, 2460 + const u32 offset[], 2461 + u32 value[], size_t array_len, u8 port) 2462 + { 2463 + struct mlx4_cmd_mailbox *mailbox; 2464 + u32 *outbox; 2465 + size_t i; 2466 + int ret; 2467 + 2468 + mailbox = mlx4_alloc_cmd_mailbox(dev); 2469 + if (IS_ERR(mailbox)) 2470 + return PTR_ERR(mailbox); 2471 + 2472 + outbox = mailbox->buf; 2473 + 2474 + ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, 2475 + MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, 2476 + MLX4_CMD_NATIVE); 2477 + if (ret) 2478 + goto out; 2479 + 2480 + for (i = 0; i < array_len; i++) { 2481 + if (offset[i] > MLX4_MAILBOX_SIZE) { 2482 + ret = -EINVAL; 2483 + goto out; 2484 + } 2485 + 2486 + MLX4_GET(value[i], outbox, offset[i]); 2487 + } 2488 + 2489 + out: 2490 + mlx4_free_cmd_mailbox(dev, mailbox); 2491 + return ret; 2492 + } 2493 + EXPORT_SYMBOL(mlx4_query_diag_counters); 2462 2494 2463 2495 int mlx4_get_phys_port_id(struct mlx4_dev *dev) 2464 2496 {
+7
include/linux/mlx4/device.h
··· 220 220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, 221 221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, 222 222 MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, 223 + MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, 223 224 }; 224 225 225 226 enum { ··· 1341 1340 VXLAN_STEER_BY_INNER_VLAN = 1 << 4, 1342 1341 }; 1343 1342 1343 + enum { 1344 + MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2, 1345 + }; 1344 1346 1345 1347 int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, 1346 1348 enum mlx4_net_trans_promisc_mode mode); ··· 1384 1380 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); 1385 1381 int mlx4_SYNC_TPT(struct mlx4_dev *dev); 1386 1382 int mlx4_test_interrupts(struct mlx4_dev *dev); 1383 + int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, 1384 + const u32 offset[], u32 value[], 1385 + size_t array_len, u8 port); 1387 1386 u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); 1388 1387 bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); 1389 1388 struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);
+13
include/rdma/ib_sa.h
··· 94 94 IB_SA_BEST = 3 95 95 }; 96 96 97 + /* 98 + * There are 4 types of join states: 99 + * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. 100 + * The order corresponds to JoinState bits in MCMemberRecord. 101 + */ 102 + enum ib_sa_mc_join_states { 103 + FULLMEMBER_JOIN, 104 + NONMEMBER_JOIN, 105 + SENDONLY_NONMEBER_JOIN, 106 + SENDONLY_FULLMEMBER_JOIN, 107 + NUM_JOIN_MEMBERSHIP_TYPES, 108 + }; 109 + 97 110 #define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) 98 111 99 112 /*
+3 -1
include/rdma/rdma_cm.h
··· 333 333 * address. 334 334 * @id: Communication identifier associated with the request. 335 335 * @addr: Multicast address identifying the group to join. 336 + * @join_state: Multicast JoinState bitmap requested by port. 337 + * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits. 336 338 * @context: User-defined context associated with the join request, returned 337 339 * to the user through the private_data pointer in multicast events. 338 340 */ 339 341 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 340 - void *context); 342 + u8 join_state, void *context); 341 343 342 344 /** 343 345 * rdma_leave_multicast - Leave the multicast group specified by the given
+1
include/uapi/rdma/Kbuild
··· 6 6 header-y += rdma_netlink.h 7 7 header-y += rdma_user_cm.h 8 8 header-y += hfi/ 9 + header-y += rdma_user_rxe.h
+8 -1
include/uapi/rdma/rdma_user_cm.h
··· 244 244 __u32 id; 245 245 }; 246 246 247 + /* Multicast join flags */ 248 + enum { 249 + RDMA_MC_JOIN_FLAG_FULLMEMBER, 250 + RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, 251 + RDMA_MC_JOIN_FLAG_RESERVED, 252 + }; 253 + 247 254 struct rdma_ucm_join_mcast { 248 255 __u64 response; /* rdma_ucma_create_id_resp */ 249 256 __u64 uid; 250 257 __u32 id; 251 258 __u16 addr_size; 252 - __u16 reserved; 259 + __u16 join_flags; 253 260 struct sockaddr_storage addr; 254 261 }; 255 262
+144
include/uapi/rdma/rdma_user_rxe.h
··· 1 + /* 2 + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 + * 4 + * This software is available to you under a choice of one of two 5 + * licenses. You may choose to be licensed under the terms of the GNU 6 + * General Public License (GPL) Version 2, available from the file 7 + * COPYING in the main directory of this source tree, or the 8 + * OpenIB.org BSD license below: 9 + * 10 + * Redistribution and use in source and binary forms, with or 11 + * without modification, are permitted provided that the following 12 + * conditions are met: 13 + * 14 + * - Redistributions of source code must retain the above 15 + * copyright notice, this list of conditions and the following 16 + * disclaimer. 17 + * 18 + * - Redistributions in binary form must reproduce the above 19 + * copyright notice, this list of conditions and the following 20 + * disclaimer in the documentation and/or other materials 21 + * provided with the distribution. 22 + * 23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 + * SOFTWARE. 31 + */ 32 + 33 + #ifndef RDMA_USER_RXE_H 34 + #define RDMA_USER_RXE_H 35 + 36 + #include <linux/types.h> 37 + 38 + union rxe_gid { 39 + __u8 raw[16]; 40 + struct { 41 + __be64 subnet_prefix; 42 + __be64 interface_id; 43 + } global; 44 + }; 45 + 46 + struct rxe_global_route { 47 + union rxe_gid dgid; 48 + __u32 flow_label; 49 + __u8 sgid_index; 50 + __u8 hop_limit; 51 + __u8 traffic_class; 52 + }; 53 + 54 + struct rxe_av { 55 + __u8 port_num; 56 + __u8 network_type; 57 + struct rxe_global_route grh; 58 + union { 59 + struct sockaddr _sockaddr; 60 + struct sockaddr_in _sockaddr_in; 61 + struct sockaddr_in6 _sockaddr_in6; 62 + } sgid_addr, dgid_addr; 63 + }; 64 + 65 + struct rxe_send_wr { 66 + __u64 wr_id; 67 + __u32 num_sge; 68 + __u32 opcode; 69 + __u32 send_flags; 70 + union { 71 + __be32 imm_data; 72 + __u32 invalidate_rkey; 73 + } ex; 74 + union { 75 + struct { 76 + __u64 remote_addr; 77 + __u32 rkey; 78 + } rdma; 79 + struct { 80 + __u64 remote_addr; 81 + __u64 compare_add; 82 + __u64 swap; 83 + __u32 rkey; 84 + } atomic; 85 + struct { 86 + __u32 remote_qpn; 87 + __u32 remote_qkey; 88 + __u16 pkey_index; 89 + } ud; 90 + struct { 91 + struct ib_mr *mr; 92 + __u32 key; 93 + int access; 94 + } reg; 95 + } wr; 96 + }; 97 + 98 + struct rxe_sge { 99 + __u64 addr; 100 + __u32 length; 101 + __u32 lkey; 102 + }; 103 + 104 + struct mminfo { 105 + __u64 offset; 106 + __u32 size; 107 + __u32 pad; 108 + }; 109 + 110 + struct rxe_dma_info { 111 + __u32 length; 112 + __u32 resid; 113 + __u32 cur_sge; 114 + __u32 num_sge; 115 + __u32 sge_offset; 116 + union { 117 + __u8 inline_data[0]; 118 + struct rxe_sge sge[0]; 119 + }; 120 + }; 121 + 122 + struct rxe_send_wqe { 123 + struct rxe_send_wr wr; 124 + struct rxe_av av; 125 + __u32 status; 126 + __u32 state; 127 + __u64 iova; 128 + __u32 mask; 129 + __u32 first_psn; 130 + __u32 last_psn; 131 + __u32 ack_length; 132 + __u32 ssn; 133 + __u32 has_rd_atomic; 134 + struct rxe_dma_info dma; 135 + }; 136 + 137 + struct rxe_recv_wqe { 138 + __u64 wr_id; 139 + __u32 num_sge; 140 + __u32 padding; 141 + struct rxe_dma_info dma; 142 + }; 143 + 144 + #endif /* RDMA_USER_RXE_H */