Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mlx' into merge-test

+876 -337
-3
drivers/infiniband/core/core_priv.h
··· 72 72 void ib_cache_setup(void); 73 73 void ib_cache_cleanup(void); 74 74 75 - int ib_resolve_eth_dmac(struct ib_qp *qp, 76 - struct ib_qp_attr *qp_attr, int *qp_attr_mask); 77 - 78 75 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 79 76 struct net_device *idev, void *cookie); 80 77
+1
drivers/infiniband/core/uverbs.h
··· 289 289 IB_UVERBS_DECLARE_EX_CMD(destroy_wq); 290 290 IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); 291 291 IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); 292 + IB_UVERBS_DECLARE_EX_CMD(modify_qp); 292 293 293 294 #endif /* UVERBS_H */
+153 -72
drivers/infiniband/core/uverbs_cmd.c
··· 2328 2328 } 2329 2329 } 2330 2330 2331 - ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 2332 - struct ib_device *ib_dev, 2333 - const char __user *buf, int in_len, 2334 - int out_len) 2331 + static int modify_qp(struct ib_uverbs_file *file, 2332 + struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) 2335 2333 { 2336 - struct ib_uverbs_modify_qp cmd; 2337 - struct ib_udata udata; 2338 - struct ib_qp *qp; 2339 - struct ib_qp_attr *attr; 2340 - int ret; 2341 - 2342 - if (copy_from_user(&cmd, buf, sizeof cmd)) 2343 - return -EFAULT; 2344 - 2345 - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, 2346 - out_len); 2334 + struct ib_qp_attr *attr; 2335 + struct ib_qp *qp; 2336 + int ret; 2347 2337 2348 2338 attr = kmalloc(sizeof *attr, GFP_KERNEL); 2349 2339 if (!attr) 2350 2340 return -ENOMEM; 2351 2341 2352 - qp = idr_read_qp(cmd.qp_handle, file->ucontext); 2342 + qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); 2353 2343 if (!qp) { 2354 2344 ret = -EINVAL; 2355 2345 goto out; 2356 2346 } 2357 2347 2358 - attr->qp_state = cmd.qp_state; 2359 - attr->cur_qp_state = cmd.cur_qp_state; 2360 - attr->path_mtu = cmd.path_mtu; 2361 - attr->path_mig_state = cmd.path_mig_state; 2362 - attr->qkey = cmd.qkey; 2363 - attr->rq_psn = cmd.rq_psn; 2364 - attr->sq_psn = cmd.sq_psn; 2365 - attr->dest_qp_num = cmd.dest_qp_num; 2366 - attr->qp_access_flags = cmd.qp_access_flags; 2367 - attr->pkey_index = cmd.pkey_index; 2368 - attr->alt_pkey_index = cmd.alt_pkey_index; 2369 - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; 2370 - attr->max_rd_atomic = cmd.max_rd_atomic; 2371 - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; 2372 - attr->min_rnr_timer = cmd.min_rnr_timer; 2373 - attr->port_num = cmd.port_num; 2374 - attr->timeout = cmd.timeout; 2375 - attr->retry_cnt = cmd.retry_cnt; 2376 - attr->rnr_retry = cmd.rnr_retry; 2377 - attr->alt_port_num = cmd.alt_port_num; 2378 - attr->alt_timeout = cmd.alt_timeout; 2348 + attr->qp_state = cmd->base.qp_state; 2349 + attr->cur_qp_state = cmd->base.cur_qp_state; 2350 + attr->path_mtu = cmd->base.path_mtu; 2351 + attr->path_mig_state = cmd->base.path_mig_state; 2352 + attr->qkey = cmd->base.qkey; 2353 + attr->rq_psn = cmd->base.rq_psn; 2354 + attr->sq_psn = cmd->base.sq_psn; 2355 + attr->dest_qp_num = cmd->base.dest_qp_num; 2356 + attr->qp_access_flags = cmd->base.qp_access_flags; 2357 + attr->pkey_index = cmd->base.pkey_index; 2358 + attr->alt_pkey_index = cmd->base.alt_pkey_index; 2359 + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; 2360 + attr->max_rd_atomic = cmd->base.max_rd_atomic; 2361 + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; 2362 + attr->min_rnr_timer = cmd->base.min_rnr_timer; 2363 + attr->port_num = cmd->base.port_num; 2364 + attr->timeout = cmd->base.timeout; 2365 + attr->retry_cnt = cmd->base.retry_cnt; 2366 + attr->rnr_retry = cmd->base.rnr_retry; 2367 + attr->alt_port_num = cmd->base.alt_port_num; 2368 + attr->alt_timeout = cmd->base.alt_timeout; 2369 + attr->rate_limit = cmd->rate_limit; 2379 2370 2380 - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); 2381 - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; 2382 - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; 2383 - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; 2384 - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; 2385 - attr->ah_attr.dlid = cmd.dest.dlid; 2386 - attr->ah_attr.sl = cmd.dest.sl; 2387 - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; 2388 - attr->ah_attr.static_rate = cmd.dest.static_rate; 2389 - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; 2390 - attr->ah_attr.port_num = cmd.dest.port_num; 2371 + memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); 2372 + attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; 2373 + attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; 2374 + attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; 2375 + attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; 2376 + attr->ah_attr.dlid = cmd->base.dest.dlid; 2377 + attr->ah_attr.sl = cmd->base.dest.sl; 2378 + attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; 2379 + attr->ah_attr.static_rate = cmd->base.dest.static_rate; 2380 + attr->ah_attr.ah_flags = cmd->base.dest.is_global ? 2381 + IB_AH_GRH : 0; 2382 + attr->ah_attr.port_num = cmd->base.dest.port_num; 2391 2383 2392 - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); 2393 - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; 2394 - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; 2395 - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; 2396 - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; 2397 - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; 2398 - attr->alt_ah_attr.sl = cmd.alt_dest.sl; 2399 - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; 2400 - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; 2401 - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; 2402 - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 2384 + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); 2385 + attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; 2386 + attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; 2387 + attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; 2388 + attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; 2389 + attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; 2390 + attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; 2391 + attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; 2392 + attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; 2393 + attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? 2394 + IB_AH_GRH : 0; 2395 + attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; 2403 2396 2404 2397 if (qp->real_qp == qp) { 2405 - ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); 2406 - if (ret) 2407 - goto release_qp; 2398 + if (cmd->base.attr_mask & IB_QP_AV) { 2399 + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); 2400 + if (ret) 2401 + goto release_qp; 2402 + } 2408 2403 ret = qp->device->modify_qp(qp, attr, 2409 - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); 2404 + modify_qp_mask(qp->qp_type, 2405 + cmd->base.attr_mask), 2406 + udata); 2410 2407 } else { 2411 - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); 2408 + ret = ib_modify_qp(qp, attr, 2409 + modify_qp_mask(qp->qp_type, 2410 + cmd->base.attr_mask)); 2412 2411 } 2413 - 2414 - if (ret) 2415 - goto release_qp; 2416 - 2417 - ret = in_len; 2418 2412 2419 2413 release_qp: 2420 2414 put_qp_read(qp); 2421 2415 2422 2416 out: 2423 2417 kfree(attr); 2418 + 2419 + return ret; 2420 + } 2421 + 2422 + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 2423 + struct ib_device *ib_dev, 2424 + const char __user *buf, int in_len, 2425 + int out_len) 2426 + { 2427 + struct ib_uverbs_ex_modify_qp cmd = {}; 2428 + struct ib_udata udata; 2429 + int ret; 2430 + 2431 + if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) 2432 + return -EFAULT; 2433 + 2434 + if (cmd.base.attr_mask & 2435 + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) 2436 + return -EOPNOTSUPP; 2437 + 2438 + INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, 2439 + in_len - sizeof(cmd.base), out_len); 2440 + 2441 + ret = modify_qp(file, &cmd, &udata); 2442 + if (ret) 2443 + return ret; 2444 + 2445 + return in_len; 2446 + } 2447 + 2448 + int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, 2449 + struct ib_device *ib_dev, 2450 + struct ib_udata *ucore, 2451 + struct ib_udata *uhw) 2452 + { 2453 + struct ib_uverbs_ex_modify_qp cmd = {}; 2454 + int ret; 2455 + 2456 + /* 2457 + * Last bit is reserved for extending the attr_mask by 2458 + * using another field. 2459 + */ 2460 + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); 2461 + 2462 + if (ucore->inlen < sizeof(cmd.base)) 2463 + return -EINVAL; 2464 + 2465 + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); 2466 + if (ret) 2467 + return ret; 2468 + 2469 + if (cmd.base.attr_mask & 2470 + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) 2471 + return -EOPNOTSUPP; 2472 + 2473 + if (ucore->inlen > sizeof(cmd)) { 2474 + if (ib_is_udata_cleared(ucore, sizeof(cmd), 2475 + ucore->inlen - sizeof(cmd))) 2476 + return -EOPNOTSUPP; 2477 + } 2478 + 2479 + ret = modify_qp(file, &cmd, uhw); 2424 2480 2425 2481 return ret; 2426 2482 } ··· 2931 2875 struct ib_ah *ah; 2932 2876 struct ib_ah_attr attr; 2933 2877 int ret; 2878 + struct ib_udata udata; 2934 2879 2935 2880 if (out_len < sizeof resp) 2936 2881 return -ENOSPC; 2937 2882 2938 2883 if (copy_from_user(&cmd, buf, sizeof cmd)) 2939 2884 return -EFAULT; 2885 + 2886 + INIT_UDATA(&udata, buf + sizeof(cmd), 2887 + (unsigned long)cmd.response + sizeof(resp), 2888 + in_len - sizeof(cmd), out_len - sizeof(resp)); 2940 2889 2941 2890 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 2942 2891 if (!uobj) ··· 2969 2908 memset(&attr.dmac, 0, sizeof(attr.dmac)); 2970 2909 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); 2971 2910 2972 - ah = ib_create_ah(pd, &attr); 2911 + ah = pd->device->create_ah(pd, &attr, &udata); 2912 + 2973 2913 if (IS_ERR(ah)) { 2974 2914 ret = PTR_ERR(ah); 2975 2915 goto err_put; 2976 2916 } 2977 2917 2918 + ah->device = pd->device; 2919 + ah->pd = pd; 2920 + atomic_inc(&pd->usecnt); 2978 2921 ah->uobject = uobj; 2979 2922 uobj->object = ah; 2980 2923 ··· 3189 3124 kern_spec_val = (void *)kern_spec + 3190 3125 sizeof(struct ib_uverbs_flow_spec_hdr); 3191 3126 kern_spec_mask = kern_spec_val + kern_filter_sz; 3127 + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) 3128 + return -EINVAL; 3192 3129 3193 - switch (ib_spec->type) { 3130 + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 3194 3131 case IB_FLOW_SPEC_ETH: 3195 3132 ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); 3196 3133 actual_filter_sz = spec_filter_size(kern_spec_mask, ··· 3241 3174 ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); 3242 3175 memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); 3243 3176 memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); 3177 + break; 3178 + case IB_FLOW_SPEC_VXLAN_TUNNEL: 3179 + ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); 3180 + actual_filter_sz = spec_filter_size(kern_spec_mask, 3181 + kern_filter_sz, 3182 + ib_filter_sz); 3183 + if (actual_filter_sz <= 0) 3184 + return -EINVAL; 3185 + ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel); 3186 + memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz); 3187 + memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz); 3188 + 3189 + if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) || 3190 + (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) 3191 + return -EINVAL; 3244 3192 break; 3245 3193 default: 3246 3194 return -EINVAL; ··· 3827 3745 err = PTR_ERR(flow_id); 3828 3746 goto err_free; 3829 3747 } 3830 - flow_id->qp = qp; 3831 3748 flow_id->uobject = uobj; 3832 3749 uobj->object = flow_id; 3833 3750
+1
drivers/infiniband/core/uverbs_main.c
··· 137 137 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, 138 138 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, 139 139 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, 140 + [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, 140 141 }; 141 142 142 143 static void ib_uverbs_add_one(struct ib_device *device);
+56 -50
drivers/infiniband/core/verbs.c
··· 315 315 { 316 316 struct ib_ah *ah; 317 317 318 - ah = pd->device->create_ah(pd, ah_attr); 318 + ah = pd->device->create_ah(pd, ah_attr, NULL); 319 319 320 320 if (!IS_ERR(ah)) { 321 321 ah->device = pd->device; ··· 328 328 } 329 329 EXPORT_SYMBOL(ib_create_ah); 330 330 331 - static int ib_get_header_version(const union rdma_network_hdr *hdr) 331 + int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) 332 332 { 333 333 const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; 334 334 struct iphdr ip4h_checked; ··· 359 359 return 4; 360 360 return 6; 361 361 } 362 + EXPORT_SYMBOL(ib_get_rdma_header_version); 362 363 363 364 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, 364 365 u8 port_num, ··· 370 369 if (rdma_protocol_ib(device, port_num)) 371 370 return RDMA_NETWORK_IB; 372 371 373 - grh_version = ib_get_header_version((union rdma_network_hdr *)grh); 372 + grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); 374 373 375 374 if (grh_version == 4) 376 375 return RDMA_NETWORK_IPV4; ··· 416 415 &context, gid_index); 417 416 } 418 417 419 - static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, 420 - enum rdma_network_type net_type, 421 - union ib_gid *sgid, union ib_gid *dgid) 418 + int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, 419 + enum rdma_network_type net_type, 420 + union ib_gid *sgid, union ib_gid *dgid) 422 421 { 423 422 struct sockaddr_in src_in; 424 423 struct sockaddr_in dst_in; ··· 448 447 return -EINVAL; 449 448 } 450 449 } 450 + EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); 451 451 452 452 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 453 453 const struct ib_wc *wc, const struct ib_grh *grh, ··· 471 469 net_type = ib_get_net_type_by_grh(device, port_num, grh); 472 470 gid_type = ib_network_to_gid_type(net_type); 473 471 } 474 - ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 475 - &sgid, &dgid); 472 + ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 473 + &sgid, &dgid); 476 474 if (ret) 477 475 return ret; 478 476 ··· 1016 1014 IB_QP_QKEY), 1017 1015 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1018 1016 IB_QP_QKEY), 1017 + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1019 1018 } 1020 1019 } 1021 1020 }, ··· 1050 1047 IB_QP_QKEY), 1051 1048 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1052 1049 IB_QP_QKEY), 1050 + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1053 1051 } 1054 1052 }, 1055 1053 [IB_QPS_SQD] = { ··· 1200 1196 } 1201 1197 EXPORT_SYMBOL(ib_modify_qp_is_ok); 1202 1198 1203 - int ib_resolve_eth_dmac(struct ib_qp *qp, 1204 - struct ib_qp_attr *qp_attr, int *qp_attr_mask) 1199 + int ib_resolve_eth_dmac(struct ib_device *device, 1200 + struct ib_ah_attr *ah_attr) 1205 1201 { 1206 1202 int ret = 0; 1207 1203 1208 - if (*qp_attr_mask & IB_QP_AV) { 1209 - if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || 1210 - qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) 1211 - return -EINVAL; 1204 + if (ah_attr->port_num < rdma_start_port(device) || 1205 + ah_attr->port_num > rdma_end_port(device)) 1206 + return -EINVAL; 1212 1207 1213 - if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) 1214 - return 0; 1208 + if (!rdma_cap_eth_ah(device, ah_attr->port_num)) 1209 + return 0; 1215 1210 1216 - if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 1217 - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, 1218 - qp_attr->ah_attr.dmac); 1219 - } else { 1220 - union ib_gid sgid; 1221 - struct ib_gid_attr sgid_attr; 1222 - int ifindex; 1223 - int hop_limit; 1211 + if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1212 + rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw, 1213 + ah_attr->dmac); 1214 + } else { 1215 + union ib_gid sgid; 1216 + struct ib_gid_attr sgid_attr; 1217 + int ifindex; 1218 + int hop_limit; 1224 1219 1225 - ret = ib_query_gid(qp->device, 1226 - qp_attr->ah_attr.port_num, 1227 - qp_attr->ah_attr.grh.sgid_index, 1228 - &sgid, &sgid_attr); 1220 + ret = ib_query_gid(device, 1221 + ah_attr->port_num, 1222 + ah_attr->grh.sgid_index, 1223 + &sgid, &sgid_attr); 1229 1224 1230 - if (ret || !sgid_attr.ndev) { 1231 - if (!ret) 1232 - ret = -ENXIO; 1233 - goto out; 1234 - } 1235 - 1236 - ifindex = sgid_attr.ndev->ifindex; 1237 - 1238 - ret = rdma_addr_find_l2_eth_by_grh(&sgid, 1239 - &qp_attr->ah_attr.grh.dgid, 1240 - qp_attr->ah_attr.dmac, 1241 - NULL, &ifindex, &hop_limit); 1242 - 1243 - dev_put(sgid_attr.ndev); 1244 - 1245 - qp_attr->ah_attr.grh.hop_limit = hop_limit; 1225 + if (ret || !sgid_attr.ndev) { 1226 + if (!ret) 1227 + ret = -ENXIO; 1228 + goto out; 1246 1229 } 1230 + 1231 + ifindex = sgid_attr.ndev->ifindex; 1232 + 1233 + ret = rdma_addr_find_l2_eth_by_grh(&sgid, 1234 + &ah_attr->grh.dgid, 1235 + ah_attr->dmac, 1236 + NULL, &ifindex, &hop_limit); 1237 + 1238 + dev_put(sgid_attr.ndev); 1239 + 1240 + ah_attr->grh.hop_limit = hop_limit; 1247 1241 } 1248 1242 out: 1249 1243 return ret; 1250 1244 } 1251 1245 EXPORT_SYMBOL(ib_resolve_eth_dmac); 1252 1246 1253 - 1254 1247 int ib_modify_qp(struct ib_qp *qp, 1255 1248 struct ib_qp_attr *qp_attr, 1256 1249 int qp_attr_mask) 1257 1250 { 1258 - int ret; 1259 1251 1260 - ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); 1261 - if (ret) 1262 - return ret; 1252 + if (qp_attr_mask & IB_QP_AV) { 1253 + int ret; 1254 + 1255 + ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); 1256 + if (ret) 1257 + return ret; 1258 + } 1263 1259 1264 1260 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 1265 1261 } ··· 1738 1734 return ERR_PTR(-ENOSYS); 1739 1735 1740 1736 flow_id = qp->device->create_flow(qp, flow_attr, domain); 1741 - if (!IS_ERR(flow_id)) 1737 + if (!IS_ERR(flow_id)) { 1742 1738 atomic_inc(&qp->usecnt); 1739 + flow_id->qp = qp; 1740 + } 1743 1741 return flow_id; 1744 1742 } 1745 1743 EXPORT_SYMBOL(ib_create_flow);
+2 -1
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 62 62 #include "common.h" 63 63 64 64 static struct ib_ah *iwch_ah_create(struct ib_pd *pd, 65 - struct ib_ah_attr *ah_attr) 65 + struct ib_ah_attr *ah_attr, 66 + struct ib_udata *udata) 66 67 { 67 68 return ERR_PTR(-ENOSYS); 68 69 }
+3 -1
drivers/infiniband/hw/cxgb4/provider.c
··· 59 59 MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); 60 60 61 61 static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, 62 - struct ib_ah_attr *ah_attr) 62 + struct ib_ah_attr *ah_attr, 63 + struct ib_udata *udata) 64 + 63 65 { 64 66 return ERR_PTR(-ENOSYS); 65 67 }
+2 -1
drivers/infiniband/hw/hns/hns_roce_ah.c
··· 39 39 #define HNS_ROCE_VLAN_SL_BIT_MASK 7 40 40 #define HNS_ROCE_VLAN_SL_SHIFT 13 41 41 42 - struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) 42 + struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr, 43 + struct ib_udata *udata) 43 44 { 44 45 struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); 45 46 struct device *dev = &hr_dev->pdev->dev;
+2 -1
drivers/infiniband/hw/hns/hns_roce_device.h
··· 687 687 unsigned long obj, int cnt, 688 688 int rr); 689 689 690 - struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 690 + struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 691 + struct ib_udata *udata); 691 692 int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); 692 693 int hns_roce_destroy_ah(struct ib_ah *ah); 693 694
+3 -1
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 2704 2704 * @ah_attr: address handle attributes 2705 2705 */ 2706 2706 static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, 2707 - struct ib_ah_attr *attr) 2707 + struct ib_ah_attr *attr, 2708 + struct ib_udata *udata) 2709 + 2708 2710 { 2709 2711 return ERR_PTR(-ENOSYS); 2710 2712 }
+7 -3
drivers/infiniband/hw/mlx4/ah.c
··· 111 111 !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) 112 112 --ah->av.eth.stat_rate; 113 113 } 114 - 114 + ah->av.eth.sl_tclass_flowlabel |= 115 + cpu_to_be32((ah_attr->grh.traffic_class << 20) | 116 + ah_attr->grh.flow_label); 115 117 /* 116 118 * HW requires multicast LID so we just choose one. 117 119 */ ··· 121 119 ah->av.ib.dlid = cpu_to_be16(0xc000); 122 120 123 121 memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); 124 - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); 122 + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); 125 123 126 124 return &ah->ibah; 127 125 } 128 126 129 - struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 127 + struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 128 + struct ib_udata *udata) 129 + 130 130 { 131 131 struct mlx4_ib_ah *ah; 132 132 struct ib_ah *ret;
+42 -7
drivers/infiniband/hw/mlx4/mad.c
··· 39 39 #include <linux/mlx4/cmd.h> 40 40 #include <linux/gfp.h> 41 41 #include <rdma/ib_pma.h> 42 + #include <linux/ip.h> 43 + #include <net/ipv6.h> 42 44 43 45 #include <linux/mlx4/driver.h> 44 46 #include "mlx4_ib.h" ··· 482 480 return -EINVAL; 483 481 } 484 482 483 + static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, 484 + union ib_gid *dgid) 485 + { 486 + int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh); 487 + enum rdma_network_type net_type; 488 + 489 + if (version == 4) 490 + net_type = RDMA_NETWORK_IPV4; 491 + else if (version == 6) 492 + net_type = RDMA_NETWORK_IPV6; 493 + else 494 + return -EINVAL; 495 + 496 + return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 497 + sgid, dgid); 498 + } 499 + 485 500 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, 486 501 enum ib_qp_type dest_qpt, struct ib_wc *wc, 487 502 struct ib_grh *grh, struct ib_mad *mad) ··· 557 538 memset(&attr, 0, sizeof attr); 558 539 attr.port_num = port; 559 540 if (is_eth) { 560 - memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); 541 + union ib_gid sgid; 542 + 543 + if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid)) 544 + return -EINVAL; 561 545 attr.ah_flags = IB_AH_GRH; 562 546 } 563 547 ah = ib_create_ah(tun_ctx->pd, &attr); ··· 673 651 is_eth = 1; 674 652 675 653 if (is_eth) { 654 + union ib_gid dgid; 655 + union ib_gid sgid; 656 + 657 + if (get_gids_from_l3_hdr(grh, &sgid, &dgid)) 658 + return -EINVAL; 676 659 if (!(wc->wc_flags & IB_WC_GRH)) { 677 660 mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); 678 661 return -EINVAL; ··· 686 659 mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); 687 660 return -EINVAL; 688 661 } 689 - err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); 662 + err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave); 690 663 if (err && mlx4_is_mf_bonded(dev->dev)) { 691 664 other_port = (port == 1) ? 2 : 1; 692 - err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); 665 + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave); 693 666 if (!err) { 694 667 port = other_port; 695 668 pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", ··· 729 702 730 703 /* If a grh is present, we demux according to it */ 731 704 if (wc->wc_flags & IB_WC_GRH) { 732 - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); 733 - if (slave < 0) { 734 - mlx4_ib_warn(ibdev, "failed matching grh\n"); 735 - return -ENOENT; 705 + if (grh->dgid.global.interface_id == 706 + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && 707 + grh->dgid.global.subnet_prefix == cpu_to_be64( 708 + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { 709 + slave = 0; 710 + } else { 711 + slave = mlx4_ib_find_real_gid(ibdev, port, 712 + grh->dgid.global.interface_id); 713 + if (slave < 0) { 714 + mlx4_ib_warn(ibdev, "failed matching grh\n"); 715 + return -ENOENT; 716 + } 736 717 } 737 718 } 738 719 /* Class-specific handling */
+19 -11
drivers/infiniband/hw/mlx4/main.c
··· 547 547 props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; 548 548 props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; 549 549 props->timestamp_mask = 0xFFFFFFFFFFFFULL; 550 + props->max_ah = INT_MAX; 550 551 551 552 if (!mlx4_is_slave(dev->dev)) 552 553 err = mlx4_get_internal_clock_params(dev->dev, &clock_params); ··· 698 697 if (err) 699 698 goto out; 700 699 701 - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? 702 - IB_WIDTH_4X : IB_WIDTH_1X; 703 - props->active_speed = IB_SPEED_QDR; 700 + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || 701 + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? 702 + IB_WIDTH_4X : IB_WIDTH_1X; 703 + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? 704 + IB_SPEED_FDR : IB_SPEED_QDR; 704 705 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; 705 706 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; 706 707 props->max_msg_sz = mdev->dev->caps.max_msg_sz; ··· 2820 2817 if (!ibdev->ib_uc_qpns_bitmap) 2821 2818 goto err_steer_qp_release; 2822 2819 2823 - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); 2824 - 2825 - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( 2826 - dev, ibdev->steer_qpn_base, 2827 - ibdev->steer_qpn_base + 2828 - ibdev->steer_qpn_count - 1); 2829 - if (err) 2830 - goto err_steer_free_bitmap; 2820 + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { 2821 + bitmap_zero(ibdev->ib_uc_qpns_bitmap, 2822 + ibdev->steer_qpn_count); 2823 + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( 2824 + dev, ibdev->steer_qpn_base, 2825 + ibdev->steer_qpn_base + 2826 + ibdev->steer_qpn_count - 1); 2827 + if (err) 2828 + goto err_steer_free_bitmap; 2829 + } else { 2830 + bitmap_fill(ibdev->ib_uc_qpns_bitmap, 2831 + ibdev->steer_qpn_count); 2832 + } 2831 2833 } 2832 2834 2833 2835 for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
+2 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 742 742 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); 743 743 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); 744 744 745 - struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 745 + struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 746 + struct ib_udata *udata); 746 747 int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); 747 748 int mlx4_ib_destroy_ah(struct ib_ah *ah); 748 749
+8 -5
drivers/infiniband/hw/mlx4/qp.c
··· 644 644 int qpn; 645 645 int err; 646 646 struct ib_qp_cap backup_cap; 647 - struct mlx4_ib_sqp *sqp; 647 + struct mlx4_ib_sqp *sqp = NULL; 648 648 struct mlx4_ib_qp *qp; 649 649 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; 650 650 struct mlx4_ib_cq *mcq; ··· 933 933 mlx4_db_free(dev->dev, &qp->db); 934 934 935 935 err: 936 - if (!*caller_qp) 936 + if (sqp) 937 + kfree(sqp); 938 + else if (!*caller_qp) 937 939 kfree(qp); 938 940 return err; 939 941 } ··· 1282 1280 if (is_qp0(dev, mqp)) 1283 1281 mlx4_CLOSE_PORT(dev->dev, mqp->port); 1284 1282 1285 - if (dev->qp1_proxy[mqp->port - 1] == mqp) { 1283 + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && 1284 + dev->qp1_proxy[mqp->port - 1] == mqp) { 1286 1285 mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); 1287 1286 dev->qp1_proxy[mqp->port - 1] = NULL; 1288 1287 mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); ··· 1767 1764 u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : 1768 1765 attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1769 1766 union ib_gid gid; 1770 - struct ib_gid_attr gid_attr; 1767 + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; 1771 1768 u16 vlan = 0xffff; 1772 1769 u8 smac[ETH_ALEN]; 1773 1770 int status = 0; 1774 1771 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && 1775 1772 attr->ah_attr.ah_flags & IB_AH_GRH; 1776 1773 1777 - if (is_eth) { 1774 + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { 1778 1775 int index = attr->ah_attr.grh.sgid_index; 1779 1776 1780 1777 status = ib_get_cached_gid(ibqp->device, port_num,
+24 -1
drivers/infiniband/hw/mlx5/ah.c
··· 64 64 return &ah->ibah; 65 65 } 66 66 67 - struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 67 + struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 68 + struct ib_udata *udata) 69 + 68 70 { 69 71 struct mlx5_ib_ah *ah; 70 72 struct mlx5_ib_dev *dev = to_mdev(pd->device); ··· 76 74 77 75 if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) 78 76 return ERR_PTR(-EINVAL); 77 + 78 + if (ll == IB_LINK_LAYER_ETHERNET && udata) { 79 + int err; 80 + struct mlx5_ib_create_ah_resp resp = {}; 81 + u32 min_resp_len = offsetof(typeof(resp), dmac) + 82 + sizeof(resp.dmac); 83 + 84 + if (udata->outlen < min_resp_len) 85 + return ERR_PTR(-EINVAL); 86 + 87 + resp.response_length = min_resp_len; 88 + 89 + err = ib_resolve_eth_dmac(pd->device, ah_attr); 90 + if (err) 91 + return ERR_PTR(err); 92 + 93 + memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN); 94 + err = ib_copy_to_udata(udata, &resp, resp.response_length); 95 + if (err) 96 + return ERR_PTR(err); 97 + } 79 98 80 99 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 81 100 if (!ah)
+31 -3
drivers/infiniband/hw/mlx5/cq.c
··· 731 731 int entries, u32 **cqb, 732 732 int *cqe_size, int *index, int *inlen) 733 733 { 734 - struct mlx5_ib_create_cq ucmd; 734 + struct mlx5_ib_create_cq ucmd = {}; 735 735 size_t ucmdlen; 736 736 int page_shift; 737 737 __be64 *pas; ··· 770 770 if (err) 771 771 goto err_umem; 772 772 773 - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, 773 + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, 774 774 &ncont, NULL); 775 775 mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", 776 776 ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); ··· 792 792 793 793 *index = to_mucontext(context)->uuari.uars[0].index; 794 794 795 + if (ucmd.cqe_comp_en == 1) { 796 + if (unlikely((*cqe_size != 64) || 797 + !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { 798 + err = -EOPNOTSUPP; 799 + mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", 800 + *cqe_size); 801 + goto err_cqb; 802 + } 803 + 804 + if (unlikely(!ucmd.cqe_comp_res_format || 805 + !(ucmd.cqe_comp_res_format < 806 + MLX5_IB_CQE_RES_RESERVED) || 807 + (ucmd.cqe_comp_res_format & 808 + (ucmd.cqe_comp_res_format - 1)))) { 809 + err = -EOPNOTSUPP; 810 + mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", 811 + ucmd.cqe_comp_res_format); 812 + goto err_cqb; 813 + } 814 + 815 + MLX5_SET(cqc, cqc, cqe_comp_en, 1); 816 + MLX5_SET(cqc, cqc, mini_cqe_res_format, 817 + ilog2(ucmd.cqe_comp_res_format)); 818 + } 819 + 795 820 return 0; 821 + 822 + err_cqb: 823 + kfree(cqb); 796 824 797 825 err_db: 798 826 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); ··· 1153 1125 return err; 1154 1126 } 1155 1127 1156 - mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, 1128 + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, 1157 1129 npas, NULL); 1158 1130 1159 1131 cq->resize_umem = umem;
+182 -90
drivers/infiniband/hw/mlx5/main.c
··· 127 127 128 128 if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) 129 129 && ibdev->ib_active) { 130 - struct ib_event ibev = {0}; 130 + struct ib_event ibev = { }; 131 131 132 132 ibev.device = &ibdev->ib_dev; 133 133 ibev.event = (event == NETDEV_UP) ? ··· 496 496 struct mlx5_ib_dev *dev = to_mdev(ibdev); 497 497 struct mlx5_core_dev *mdev = dev->mdev; 498 498 int err = -ENOMEM; 499 + int max_sq_desc; 499 500 int max_rq_sg; 500 501 int max_sq_sg; 501 502 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); ··· 619 618 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 620 619 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 621 620 sizeof(struct mlx5_wqe_data_seg); 622 - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - 623 - sizeof(struct mlx5_wqe_ctrl_seg)) / 624 - sizeof(struct mlx5_wqe_data_seg); 621 + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 622 + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - 623 + sizeof(struct mlx5_wqe_raddr_seg)) / 624 + sizeof(struct mlx5_wqe_data_seg); 625 625 props->max_sge = min(max_rq_sg, max_sq_sg); 626 626 props->max_sge_rd = MLX5_MAX_SGE_RD; 627 627 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); ··· 645 643 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 646 644 props->max_mcast_grp; 647 645 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 646 + props->max_ah = INT_MAX; 648 647 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); 649 648 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; 650 649 ··· 670 667 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; 671 668 props->max_wq_type_rq = 672 669 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); 670 + } 671 + 672 + if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, 673 + uhw->outlen)) { 674 + resp.mlx5_ib_support_multi_pkt_send_wqes = 675 + MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); 676 + resp.response_length += 677 + sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); 678 + } 679 + 680 + if (field_avail(typeof(resp), reserved, uhw->outlen)) 681 + resp.response_length += sizeof(resp.reserved); 682 + 683 + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { 684 + resp.cqe_comp_caps.max_num = 685 + MLX5_CAP_GEN(dev->mdev, cqe_compression) ? 686 + MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; 687 + resp.cqe_comp_caps.supported_format = 688 + MLX5_IB_CQE_RES_FORMAT_HASH | 689 + MLX5_IB_CQE_RES_FORMAT_CSUM; 690 + resp.response_length += sizeof(resp.cqe_comp_caps); 691 + } 692 + 693 + if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { 694 + if (MLX5_CAP_QOS(mdev, packet_pacing) && 695 + MLX5_CAP_GEN(mdev, qos)) { 696 + resp.packet_pacing_caps.qp_rate_limit_max = 697 + MLX5_CAP_QOS(mdev, packet_pacing_max_rate); 698 + resp.packet_pacing_caps.qp_rate_limit_min = 699 + MLX5_CAP_QOS(mdev, packet_pacing_min_rate); 700 + resp.packet_pacing_caps.supported_qpts |= 701 + 1 << IB_QPT_RAW_PACKET; 702 + } 703 + resp.response_length += sizeof(resp.packet_pacing_caps); 673 704 } 674 705 675 706 if (uhw->outlen) { ··· 1130 1093 resp.response_length += sizeof(resp.cqe_version); 1131 1094 1132 1095 if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { 1133 - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; 1096 + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1097 + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1134 1098 resp.response_length += sizeof(resp.cmds_supp_uhw); 1135 1099 } 1136 1100 ··· 1540 1502 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); 1541 1503 } 1542 1504 1505 + static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, 1506 + bool inner) 1507 + { 1508 + if (inner) { 1509 + MLX5_SET(fte_match_set_misc, 1510 + misc_c, inner_ipv6_flow_label, mask); 1511 + MLX5_SET(fte_match_set_misc, 1512 + misc_v, inner_ipv6_flow_label, val); 1513 + } else { 1514 + MLX5_SET(fte_match_set_misc, 1515 + misc_c, outer_ipv6_flow_label, mask); 1516 + MLX5_SET(fte_match_set_misc, 1517 + misc_v, outer_ipv6_flow_label, val); 1518 + } 1519 + } 1520 + 1543 1521 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) 1544 1522 { 1545 1523 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); ··· 1569 1515 #define LAST_IPV4_FIELD tos 1570 1516 #define LAST_IPV6_FIELD traffic_class 1571 1517 #define LAST_TCP_UDP_FIELD src_port 1518 + #define LAST_TUNNEL_FIELD tunnel_id 1572 1519 1573 1520 /* Field is the last supported field */ 1574 1521 #define FIELDS_NOT_SUPPORTED(filter, field)\ ··· 1582 1527 static int parse_flow_attr(u32 *match_c, u32 *match_v, 1583 1528 const union ib_flow_spec *ib_spec) 1584 1529 { 1585 - void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1586 - outer_headers); 1587 - void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1588 - outer_headers); 1589 1530 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 1590 1531 misc_parameters); 1591 1532 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, 1592 1533 misc_parameters); 1534 + void *headers_c; 1535 + void *headers_v; 1593 1536 1594 - switch (ib_spec->type) { 1537 + if (ib_spec->type & IB_FLOW_SPEC_INNER) { 1538 + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1539 + inner_headers); 1540 + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1541 + inner_headers); 1542 + } else { 1543 + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1544 + outer_headers); 1545 + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1546 + outer_headers); 1547 + } 1548 + 1549 + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 1595 1550 case IB_FLOW_SPEC_ETH: 1596 1551 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 1597 1552 return -ENOTSUPP; 1598 1553 1599 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1554 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1600 1555 dmac_47_16), 1601 1556 ib_spec->eth.mask.dst_mac); 1602 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1557 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1603 1558 dmac_47_16), 1604 1559 ib_spec->eth.val.dst_mac); 1605 1560 1606 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1561 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1607 1562 smac_47_16), 1608 1563 ib_spec->eth.mask.src_mac); 1609 - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1564 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1610 1565 smac_47_16), 1611 1566 ib_spec->eth.val.src_mac); 1612 1567 1613 1568 if (ib_spec->eth.mask.vlan_tag) { 1614 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1569 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1615 1570 vlan_tag, 1); 1616 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1571 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1617 1572 vlan_tag, 1); 1618 1573 1619 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1574 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1620 1575 first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 1621 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1576 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1622 1577 first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 1623 1578 1624 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1579 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1625 1580 first_cfi, 1626 1581 ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 1627 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1582 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1628 1583 first_cfi, 1629 1584 ntohs(ib_spec->eth.val.vlan_tag) >> 12); 1630 1585 1631 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1586 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1632 1587 first_prio, 1633 1588 ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 1634 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1589 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1635 1590 first_prio, 1636 1591 ntohs(ib_spec->eth.val.vlan_tag) >> 13); 1637 1592 } 1638 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1593 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1639 1594 ethertype, ntohs(ib_spec->eth.mask.ether_type)); 1640 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1595 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1641 1596 ethertype, ntohs(ib_spec->eth.val.ether_type)); 1642 1597 break; 1643 1598 case IB_FLOW_SPEC_IPV4: 1644 1599 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 1645 1600 return -ENOTSUPP; 1646 1601 1647 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1602 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1648 1603 ethertype, 0xffff); 1649 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1604 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1650 1605 ethertype, ETH_P_IP); 1651 1606 1652 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1607 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1653 1608 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1654 1609 &ib_spec->ipv4.mask.src_ip, 1655 1610 sizeof(ib_spec->ipv4.mask.src_ip)); 1656 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1611 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1657 1612 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1658 1613 &ib_spec->ipv4.val.src_ip, 1659 1614 sizeof(ib_spec->ipv4.val.src_ip)); 1660 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1615 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1661 1616 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1662 1617 &ib_spec->ipv4.mask.dst_ip, 1663 1618 sizeof(ib_spec->ipv4.mask.dst_ip)); 1664 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1619 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1665 1620 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1666 1621 &ib_spec->ipv4.val.dst_ip, 1667 1622 sizeof(ib_spec->ipv4.val.dst_ip)); 1668 1623 1669 - set_tos(outer_headers_c, outer_headers_v, 1624 + set_tos(headers_c, headers_v, 1670 1625 ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); 1671 1626 1672 - set_proto(outer_headers_c, outer_headers_v, 1627 + set_proto(headers_c, headers_v, 1673 1628 ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); 1674 1629 break; 1675 1630 case IB_FLOW_SPEC_IPV6: 1676 1631 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 1677 1632 return -ENOTSUPP; 1678 1633 1679 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1634 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1680 1635 ethertype, 0xffff); 1681 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1636 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1682 1637 ethertype, ETH_P_IPV6); 1683 1638 1684 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1639 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1685 1640 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1686 1641 &ib_spec->ipv6.mask.src_ip, 1687 1642 sizeof(ib_spec->ipv6.mask.src_ip)); 1688 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1643 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1689 1644 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1690 1645 &ib_spec->ipv6.val.src_ip, 1691 1646 sizeof(ib_spec->ipv6.val.src_ip)); 1692 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1647 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1693 1648 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1694 1649 &ib_spec->ipv6.mask.dst_ip, 1695 1650 sizeof(ib_spec->ipv6.mask.dst_ip)); 1696 - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1651 + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1697 1652 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1698 1653 &ib_spec->ipv6.val.dst_ip, 1699 1654 sizeof(ib_spec->ipv6.val.dst_ip)); 1700 1655 1701 - set_tos(outer_headers_c, outer_headers_v, 1656 + set_tos(headers_c, headers_v, 1702 1657 ib_spec->ipv6.mask.traffic_class, 1703 1658 ib_spec->ipv6.val.traffic_class); 1704 1659 1705 - set_proto(outer_headers_c, outer_headers_v, 1660 + set_proto(headers_c, headers_v, 1706 1661 ib_spec->ipv6.mask.next_hdr, 1707 1662 ib_spec->ipv6.val.next_hdr); 1708 1663 1709 - MLX5_SET(fte_match_set_misc, misc_params_c, 1710 - outer_ipv6_flow_label, 1711 - ntohl(ib_spec->ipv6.mask.flow_label)); 1712 - MLX5_SET(fte_match_set_misc, misc_params_v, 1713 - outer_ipv6_flow_label, 1714 - ntohl(ib_spec->ipv6.val.flow_label)); 1664 + set_flow_label(misc_params_c, misc_params_v, 1665 + ntohl(ib_spec->ipv6.mask.flow_label), 1666 + ntohl(ib_spec->ipv6.val.flow_label), 1667 + ib_spec->type & IB_FLOW_SPEC_INNER); 1668 + 1715 1669 break; 1716 1670 case IB_FLOW_SPEC_TCP: 1717 1671 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 1718 1672 LAST_TCP_UDP_FIELD)) 1719 1673 return -ENOTSUPP; 1720 1674 1721 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 1675 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1722 1676 0xff); 1723 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, 1677 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 1724 1678 IPPROTO_TCP); 1725 1679 1726 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, 1680 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, 1727 1681 ntohs(ib_spec->tcp_udp.mask.src_port)); 1728 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, 1682 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, 1729 1683 ntohs(ib_spec->tcp_udp.val.src_port)); 1730 1684 1731 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, 1685 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, 1732 1686 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1733 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, 1687 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, 1734 1688 ntohs(ib_spec->tcp_udp.val.dst_port)); 1735 1689 break; 1736 1690 case IB_FLOW_SPEC_UDP: ··· 1747 1683 LAST_TCP_UDP_FIELD)) 1748 1684 return -ENOTSUPP; 1749 1685 1750 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 1686 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1751 1687 0xff); 1752 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, 1688 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 1753 1689 IPPROTO_UDP); 1754 1690 1755 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, 1691 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, 1756 1692 ntohs(ib_spec->tcp_udp.mask.src_port)); 1757 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, 1693 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, 1758 1694 ntohs(ib_spec->tcp_udp.val.src_port)); 1759 1695 1760 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, 1696 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, 1761 1697 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1762 - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, 1698 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 1763 1699 ntohs(ib_spec->tcp_udp.val.dst_port)); 1700 + break; 1701 + case IB_FLOW_SPEC_VXLAN_TUNNEL: 1702 + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 1703 + LAST_TUNNEL_FIELD)) 1704 + return -ENOTSUPP; 1705 + 1706 + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 1707 + ntohl(ib_spec->tunnel.mask.tunnel_id)); 1708 + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 1709 + ntohl(ib_spec->tunnel.val.tunnel_id)); 1764 1710 break; 1765 1711 default: 1766 1712 return -EINVAL; ··· 2792 2718 struct ib_port_immutable *immutable) 2793 2719 { 2794 2720 struct ib_port_attr attr; 2721 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 2722 + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 2795 2723 int err; 2796 2724 2797 2725 err = mlx5_ib_query_port(ibdev, port_num, &attr); ··· 2803 2727 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2804 2728 immutable->gid_tbl_len = attr.gid_tbl_len; 2805 2729 immutable->core_cap_flags = get_core_cap_flags(ibdev); 2806 - immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2730 + if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) 2731 + immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2807 2732 2808 2733 return 0; 2809 2734 } ··· 2818 2741 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 2819 2742 } 2820 2743 2821 - static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) 2744 + static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) 2822 2745 { 2823 2746 struct mlx5_core_dev *mdev = dev->mdev; 2824 2747 struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, ··· 2847 2770 return err; 2848 2771 } 2849 2772 2850 - static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) 2773 + static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) 2851 2774 { 2852 2775 struct mlx5_core_dev *mdev = dev->mdev; 2853 2776 ··· 2859 2782 } 2860 2783 } 2861 2784 2862 - static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) 2863 - { 2864 - if (dev->roce.nb.notifier_call) { 2865 - unregister_netdevice_notifier(&dev->roce.nb); 2866 - dev->roce.nb.notifier_call = NULL; 2867 - } 2868 - } 2869 - 2870 - static int mlx5_enable_roce(struct mlx5_ib_dev *dev) 2785 + static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) 2871 2786 { 2872 2787 int err; 2873 2788 ··· 2870 2801 return err; 2871 2802 } 2872 2803 2873 - err = mlx5_nic_vport_enable_roce(dev->mdev); 2874 - if (err) 2875 - goto err_unregister_netdevice_notifier; 2804 + return 0; 2805 + } 2876 2806 2877 - err = mlx5_roce_lag_init(dev); 2807 + static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) 2808 + { 2809 + if (dev->roce.nb.notifier_call) { 2810 + unregister_netdevice_notifier(&dev->roce.nb); 2811 + dev->roce.nb.notifier_call = NULL; 2812 + } 2813 + } 2814 + 2815 + static int mlx5_enable_eth(struct mlx5_ib_dev *dev) 2816 + { 2817 + int err; 2818 + 2819 + err = mlx5_add_netdev_notifier(dev); 2820 + if (err) 2821 + return err; 2822 + 2823 + if (MLX5_CAP_GEN(dev->mdev, roce)) { 2824 + err = mlx5_nic_vport_enable_roce(dev->mdev); 2825 + if (err) 2826 + goto err_unregister_netdevice_notifier; 2827 + } 2828 + 2829 + err = mlx5_eth_lag_init(dev); 2878 2830 if (err) 2879 2831 goto err_disable_roce; 2880 2832 2881 2833 return 0; 2882 2834 2883 2835 err_disable_roce: 2884 - mlx5_nic_vport_disable_roce(dev->mdev); 2836 + if (MLX5_CAP_GEN(dev->mdev, roce)) 2837 + mlx5_nic_vport_disable_roce(dev->mdev); 2885 2838 2886 2839 err_unregister_netdevice_notifier: 2887 - mlx5_remove_roce_notifier(dev); 2840 + mlx5_remove_netdev_notifier(dev); 2888 2841 return err; 2889 2842 } 2890 2843 2891 - static void mlx5_disable_roce(struct mlx5_ib_dev *dev) 2844 + static void mlx5_disable_eth(struct mlx5_ib_dev *dev) 2892 2845 { 2893 - mlx5_roce_lag_cleanup(dev); 2894 - mlx5_nic_vport_disable_roce(dev->mdev); 2846 + mlx5_eth_lag_cleanup(dev); 2847 + if (MLX5_CAP_GEN(dev->mdev, roce)) 2848 + mlx5_nic_vport_disable_roce(dev->mdev); 2895 2849 } 2896 2850 2897 2851 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) ··· 3036 2944 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 3037 2945 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 3038 2946 3039 - if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) 3040 - return NULL; 3041 - 3042 2947 printk_once(KERN_INFO "%s", mlx5_version); 3043 2948 3044 2949 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); ··· 3081 2992 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 3082 2993 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 3083 2994 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2995 + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 2996 + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 3084 2997 (1ull << IB_USER_VERBS_CMD_REG_MR) | 3085 2998 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 3086 2999 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | ··· 3105 3014 dev->ib_dev.uverbs_ex_cmd_mask = 3106 3015 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 3107 3016 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 3108 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); 3017 + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | 3018 + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); 3109 3019 3110 3020 dev->ib_dev.query_device = mlx5_ib_query_device; 3111 3021 dev->ib_dev.query_port = mlx5_ib_query_port; ··· 3217 3125 spin_lock_init(&dev->reset_flow_resource_lock); 3218 3126 3219 3127 if (ll == IB_LINK_LAYER_ETHERNET) { 3220 - err = mlx5_enable_roce(dev); 3128 + err = mlx5_enable_eth(dev); 3221 3129 if (err) 3222 3130 goto err_dealloc; 3223 3131 } 3224 3132 3225 3133 err = create_dev_resources(&dev->devr); 3226 3134 if (err) 3227 - goto err_disable_roce; 3135 + goto err_disable_eth; 3228 3136 3229 3137 err = mlx5_ib_odp_init_one(dev); 3230 3138 if (err) ··· 3268 3176 err_rsrc: 3269 3177 destroy_dev_resources(&dev->devr); 3270 3178 3271 - err_disable_roce: 3179 + err_disable_eth: 3272 3180 if (ll == IB_LINK_LAYER_ETHERNET) { 3273 - mlx5_disable_roce(dev); 3274 - mlx5_remove_roce_notifier(dev); 3181 + mlx5_disable_eth(dev); 3182 + mlx5_remove_netdev_notifier(dev); 3275 3183 } 3276 3184 3277 3185 err_free_port: ··· 3288 3196 struct mlx5_ib_dev *dev = context; 3289 3197 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); 3290 3198 3291 - mlx5_remove_roce_notifier(dev); 3199 + mlx5_remove_netdev_notifier(dev); 3292 3200 ib_unregister_device(&dev->ib_dev); 3293 3201 mlx5_ib_dealloc_q_counters(dev); 3294 3202 destroy_umrc_res(dev); 3295 3203 mlx5_ib_odp_remove_one(dev); 3296 3204 destroy_dev_resources(&dev->devr); 3297 3205 if (ll == IB_LINK_LAYER_ETHERNET) 3298 - mlx5_disable_roce(dev); 3206 + mlx5_disable_eth(dev); 3299 3207 kfree(dev->port); 3300 3208 ib_dealloc_device(&dev->ib_dev); 3301 3209 }
+6 -1
drivers/infiniband/hw/mlx5/mem.c
··· 37 37 38 38 /* @umem: umem object to scan 39 39 * @addr: ib virtual address requested by the user 40 + * @max_page_shift: high limit for page_shift - 0 means no limit 40 41 * @count: number of PAGE_SIZE pages covered by umem 41 42 * @shift: page shift for the compound pages found in the region 42 43 * @ncont: number of compund pages 43 44 * @order: log2 of the number of compound pages 44 45 */ 45 - void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, 46 + void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 47 + unsigned long max_page_shift, 48 + int *count, int *shift, 46 49 int *ncont, int *order) 47 50 { 48 51 unsigned long tmp; ··· 75 72 addr = addr >> page_shift; 76 73 tmp = (unsigned long)addr; 77 74 m = find_first_bit(&tmp, BITS_PER_LONG); 75 + if (max_page_shift) 76 + m = min_t(unsigned long, max_page_shift - page_shift, m); 78 77 skip = 1 << m; 79 78 mask = skip - 1; 80 79 i = 0;
+9 -3
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 63 63 #define MLX5_IB_DEFAULT_UIDX 0xffffff 64 64 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) 65 65 66 + #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) 67 + 66 68 enum { 67 69 MLX5_IB_MMAP_CMD_SHIFT = 8, 68 70 MLX5_IB_MMAP_CMD_MASK = 0xff, ··· 389 387 struct list_head qps_list; 390 388 struct list_head cq_recv_list; 391 389 struct list_head cq_send_list; 390 + u32 rate_limit; 392 391 }; 393 392 394 393 struct mlx5_ib_cq_buf { ··· 421 418 struct ib_pd *pd; 422 419 unsigned int page_shift; 423 420 unsigned int npages; 424 - u32 length; 421 + u64 length; 425 422 int access_flags; 426 423 u32 mkey; 427 424 }; ··· 740 737 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, 741 738 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, 742 739 const void *in_mad, void *response_mad); 743 - struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 740 + struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 741 + struct ib_udata *udata); 744 742 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); 745 743 int mlx5_ib_destroy_ah(struct ib_ah *ah); 746 744 struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, ··· 827 823 struct ib_port_attr *props); 828 824 int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); 829 825 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); 830 - void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, 826 + void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 827 + unsigned long max_page_shift, 828 + int *count, int *shift, 831 829 int *ncont, int *order); 832 830 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 833 831 int page_shift, size_t offset, size_t num_pages,
+52 -19
drivers/infiniband/hw/mlx5/mr.c
··· 627 627 ent->order = i + 2; 628 628 ent->dev = dev; 629 629 630 - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 630 + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && 631 + (mlx5_core_is_pf(dev->mdev))) 631 632 limit = dev->mdev->profile->mr_cache[i].limit; 632 633 else 633 634 limit = 0; ··· 646 645 return 0; 647 646 } 648 647 648 + static void wait_for_async_commands(struct mlx5_ib_dev *dev) 649 + { 650 + struct mlx5_mr_cache *cache = &dev->cache; 651 + struct mlx5_cache_ent *ent; 652 + int total = 0; 653 + int i; 654 + int j; 655 + 656 + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 657 + ent = &cache->ent[i]; 658 + for (j = 0 ; j < 1000; j++) { 659 + if (!ent->pending) 660 + break; 661 + msleep(50); 662 + } 663 + } 664 + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 665 + ent = &cache->ent[i]; 666 + total += ent->pending; 667 + } 668 + 669 + if (total) 670 + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); 671 + else 672 + mlx5_ib_warn(dev, "done with all pending requests\n"); 673 + } 674 + 649 675 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 650 676 { 651 677 int i; ··· 686 658 clean_keys(dev, i); 687 659 688 660 destroy_workqueue(dev->cache.wq); 661 + wait_for_async_commands(dev); 689 662 del_timer_sync(&dev->delay_timer); 690 663 691 664 return 0; ··· 844 815 umrwr->mkey = key; 845 816 } 846 817 847 - static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 848 - int access_flags, int *npages, 849 - int *page_shift, int *ncont, int *order) 818 + static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 819 + int access_flags, struct ib_umem **umem, 820 + int *npages, int *page_shift, int *ncont, 821 + int *order) 850 822 { 851 823 struct mlx5_ib_dev *dev = to_mdev(pd->device); 852 - struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, 853 - access_flags, 0); 854 - if (IS_ERR(umem)) { 824 + int err; 825 + 826 + *umem = ib_umem_get(pd->uobject->context, start, length, 827 + access_flags, 0); 828 + err = PTR_ERR_OR_ZERO(*umem); 829 + if (err < 0) { 855 830 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 856 - return (void *)umem; 831 + return err; 857 832 } 858 833 859 - mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); 834 + mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, 835 + page_shift, ncont, order); 860 836 if (!*npages) { 861 837 mlx5_ib_warn(dev, "avoid zero region\n"); 862 - ib_umem_release(umem); 863 - return ERR_PTR(-EINVAL); 838 + ib_umem_release(*umem); 839 + return -EINVAL; 864 840 } 865 841 866 842 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 867 843 *npages, *ncont, *order, *page_shift); 868 844 869 - return umem; 845 + return 0; 870 846 } 871 847 872 848 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) ··· 1197 1163 1198 1164 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1199 1165 start, virt_addr, length, access_flags); 1200 - umem = mr_umem_get(pd, start, length, access_flags, &npages, 1166 + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, 1201 1167 &page_shift, &ncont, &order); 1202 1168 1203 - if (IS_ERR(umem)) 1204 - return (void *)umem; 1169 + if (err < 0) 1170 + return ERR_PTR(err); 1205 1171 1206 1172 if (use_umr(order)) { 1207 1173 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, ··· 1375 1341 */ 1376 1342 flags |= IB_MR_REREG_TRANS; 1377 1343 ib_umem_release(mr->umem); 1378 - mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, 1379 - &page_shift, &ncont, &order); 1380 - if (IS_ERR(mr->umem)) { 1381 - err = PTR_ERR(mr->umem); 1344 + err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, 1345 + &npages, &page_shift, &ncont, &order); 1346 + if (err < 0) { 1382 1347 mr->umem = NULL; 1383 1348 return err; 1384 1349 }
+108 -27
drivers/infiniband/hw/mlx5/qp.c
··· 78 78 79 79 enum raw_qp_set_mask_map { 80 80 MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, 81 + MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, 81 82 }; 82 83 83 84 struct mlx5_modify_raw_qp_param { 84 85 u16 operation; 85 86 86 87 u32 set_mask; /* raw_qp_set_mask_map */ 88 + u32 rate_limit; 87 89 u8 rq_q_ctr_id; 88 90 }; 89 91 ··· 354 352 return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); 355 353 } 356 354 355 + static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) 356 + { 357 + int max_sge; 358 + 359 + if (attr->qp_type == IB_QPT_RC) 360 + max_sge = (min_t(int, wqe_size, 512) - 361 + sizeof(struct mlx5_wqe_ctrl_seg) - 362 + sizeof(struct mlx5_wqe_raddr_seg)) / 363 + sizeof(struct mlx5_wqe_data_seg); 364 + else if (attr->qp_type == IB_QPT_XRC_INI) 365 + max_sge = (min_t(int, wqe_size, 512) - 366 + sizeof(struct mlx5_wqe_ctrl_seg) - 367 + sizeof(struct mlx5_wqe_xrc_seg) - 368 + sizeof(struct mlx5_wqe_raddr_seg)) / 369 + sizeof(struct mlx5_wqe_data_seg); 370 + else 371 + max_sge = (wqe_size - sq_overhead(attr)) / 372 + sizeof(struct mlx5_wqe_data_seg); 373 + 374 + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / 375 + sizeof(struct mlx5_wqe_data_seg)); 376 + } 377 + 357 378 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, 358 379 struct mlx5_ib_qp *qp) 359 380 { ··· 407 382 wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); 408 383 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; 409 384 if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { 410 - mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", 385 + mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n", 386 + attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB, 411 387 qp->sq.wqe_cnt, 412 388 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); 413 389 return -ENOMEM; 414 390 } 415 391 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); 416 - qp->sq.max_gs = attr->cap.max_send_sge; 392 + qp->sq.max_gs = get_send_sge(attr, wqe_size); 393 + if (qp->sq.max_gs < attr->cap.max_send_sge) 394 + return -ENOMEM; 395 + 396 + attr->cap.max_send_sge = qp->sq.max_gs; 417 397 qp->sq.max_post = wq_size / wqe_size; 418 398 attr->cap.max_send_wr = qp->sq.max_post; 419 399 ··· 678 648 return PTR_ERR(*umem); 679 649 } 680 650 681 - mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); 651 + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); 682 652 683 653 err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); 684 654 if (err) { ··· 731 701 return err; 732 702 } 733 703 734 - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, 704 + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, 735 705 &ncont, NULL); 736 706 err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, 737 707 &rwq->rq_page_offset); ··· 2473 2443 } 2474 2444 2475 2445 static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, 2476 - struct mlx5_ib_sq *sq, int new_state) 2446 + struct mlx5_ib_sq *sq, 2447 + int new_state, 2448 + const struct mlx5_modify_raw_qp_param *raw_qp_param) 2477 2449 { 2450 + struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; 2451 + u32 old_rate = ibqp->rate_limit; 2452 + u32 new_rate = old_rate; 2453 + u16 rl_index = 0; 2478 2454 void *in; 2479 2455 void *sqc; 2480 2456 int inlen; ··· 2496 2460 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 2497 2461 MLX5_SET(sqc, sqc, state, new_state); 2498 2462 2499 - err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); 2500 - if (err) 2501 - goto out; 2463 + if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) { 2464 + if (new_state != MLX5_SQC_STATE_RDY) 2465 + pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", 2466 + __func__); 2467 + else 2468 + new_rate = raw_qp_param->rate_limit; 2469 + } 2502 2470 2471 + if (old_rate != new_rate) { 2472 + if (new_rate) { 2473 + err = mlx5_rl_add_rate(dev, new_rate, &rl_index); 2474 + if (err) { 2475 + pr_err("Failed configuring rate %u: %d\n", 2476 + new_rate, err); 2477 + goto out; 2478 + } 2479 + } 2480 + 2481 + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 2482 + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 2483 + } 2484 + 2485 + err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); 2486 + if (err) { 2487 + /* Remove new rate from table if failed */ 2488 + if (new_rate && 2489 + old_rate != new_rate) 2490 + mlx5_rl_remove_rate(dev, new_rate); 2491 + goto out; 2492 + } 2493 + 2494 + /* Only remove the old rate after new rate was set */ 2495 + if ((old_rate && 2496 + (old_rate != new_rate)) || 2497 + (new_state != MLX5_SQC_STATE_RDY)) 2498 + mlx5_rl_remove_rate(dev, old_rate); 2499 + 2500 + ibqp->rate_limit = new_rate; 2503 2501 sq->state = new_state; 2504 2502 2505 2503 out: ··· 2548 2478 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; 2549 2479 struct mlx5_ib_rq *rq = &raw_packet_qp->rq; 2550 2480 struct mlx5_ib_sq *sq = &raw_packet_qp->sq; 2481 + int modify_rq = !!qp->rq.wqe_cnt; 2482 + int modify_sq = !!qp->sq.wqe_cnt; 2551 2483 int rq_state; 2552 2484 int sq_state; 2553 2485 int err; ··· 2567 2495 rq_state = MLX5_RQC_STATE_RST; 2568 2496 sq_state = MLX5_SQC_STATE_RST; 2569 2497 break; 2570 - case MLX5_CMD_OP_INIT2INIT_QP: 2571 - case MLX5_CMD_OP_INIT2RTR_QP: 2572 2498 case MLX5_CMD_OP_RTR2RTS_QP: 2573 2499 case MLX5_CMD_OP_RTS2RTS_QP: 2500 + if (raw_qp_param->set_mask == 2501 + MLX5_RAW_QP_RATE_LIMIT) { 2502 + modify_rq = 0; 2503 + sq_state = sq->state; 2504 + } else { 2505 + return raw_qp_param->set_mask ? -EINVAL : 0; 2506 + } 2507 + break; 2508 + case MLX5_CMD_OP_INIT2INIT_QP: 2509 + case MLX5_CMD_OP_INIT2RTR_QP: 2574 2510 if (raw_qp_param->set_mask) 2575 2511 return -EINVAL; 2576 2512 else ··· 2588 2508 return -EINVAL; 2589 2509 } 2590 2510 2591 - if (qp->rq.wqe_cnt) { 2592 - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); 2511 + if (modify_rq) { 2512 + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); 2593 2513 if (err) 2594 2514 return err; 2595 2515 } 2596 2516 2597 - if (qp->sq.wqe_cnt) { 2517 + if (modify_sq) { 2598 2518 if (tx_affinity) { 2599 2519 err = modify_raw_packet_tx_affinity(dev->mdev, sq, 2600 2520 tx_affinity); ··· 2602 2522 return err; 2603 2523 } 2604 2524 2605 - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); 2525 + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); 2606 2526 } 2607 2527 2608 2528 return 0; ··· 2658 2578 struct mlx5_ib_port *mibport = NULL; 2659 2579 enum mlx5_qp_state mlx5_cur, mlx5_new; 2660 2580 enum mlx5_qp_optpar optpar; 2661 - int sqd_event; 2662 2581 int mlx5_st; 2663 2582 int err; 2664 2583 u16 op; ··· 2804 2725 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2805 2726 context->db_rec_addr = cpu_to_be64(qp->db.dma); 2806 2727 2807 - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 2808 - attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) 2809 - sqd_event = 1; 2810 - else 2811 - sqd_event = 0; 2812 - 2813 2728 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 2814 2729 u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : 2815 2730 qp->port) - 1; ··· 2850 2777 raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; 2851 2778 raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; 2852 2779 } 2780 + 2781 + if (attr_mask & IB_QP_RATE_LIMIT) { 2782 + raw_qp_param.rate_limit = attr->rate_limit; 2783 + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; 2784 + } 2785 + 2853 2786 err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); 2854 2787 } else { 2855 2788 err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, ··· 3147 3068 { 3148 3069 memset(umr, 0, sizeof(*umr)); 3149 3070 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 3150 - umr->flags = 1 << 7; 3071 + umr->flags = MLX5_UMR_INLINE; 3151 3072 } 3152 3073 3153 - static __be64 get_umr_reg_mr_mask(void) 3074 + static __be64 get_umr_reg_mr_mask(int atomic) 3154 3075 { 3155 3076 u64 result; 3156 3077 ··· 3163 3084 MLX5_MKEY_MASK_KEY | 3164 3085 MLX5_MKEY_MASK_RR | 3165 3086 MLX5_MKEY_MASK_RW | 3166 - MLX5_MKEY_MASK_A | 3167 3087 MLX5_MKEY_MASK_FREE; 3088 + 3089 + if (atomic) 3090 + result |= MLX5_MKEY_MASK_A; 3168 3091 3169 3092 return cpu_to_be64(result); 3170 3093 } ··· 3228 3147 } 3229 3148 3230 3149 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 3231 - struct ib_send_wr *wr) 3150 + struct ib_send_wr *wr, int atomic) 3232 3151 { 3233 3152 struct mlx5_umr_wr *umrwr = umr_wr(wr); 3234 3153 ··· 3253 3172 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) 3254 3173 umr->mkey_mask |= get_umr_update_pd_mask(); 3255 3174 if (!umr->mkey_mask) 3256 - umr->mkey_mask = get_umr_reg_mr_mask(); 3175 + umr->mkey_mask = get_umr_reg_mr_mask(atomic); 3257 3176 } else { 3258 3177 umr->mkey_mask = get_umr_unreg_mr_mask(); 3259 3178 } ··· 4106 4025 } 4107 4026 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 4108 4027 ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); 4109 - set_reg_umr_segment(seg, wr); 4028 + set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); 4110 4029 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 4111 4030 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 4112 4031 if (unlikely((seg == qend)))
+2 -2
drivers/infiniband/hw/mlx5/srq.c
··· 118 118 return err; 119 119 } 120 120 121 - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, 121 + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, 122 122 &page_shift, &ncont, NULL); 123 123 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, 124 124 &offset); ··· 280 280 mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", 281 281 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, 282 282 srq->msrq.max_avail_gather); 283 + in.type = init_attr->srq_type; 283 284 284 285 if (pd->uobject) 285 286 err = create_srq_user(pd, srq, &in, udata, buf_size); ··· 293 292 goto err_srq; 294 293 } 295 294 296 - in.type = init_attr->srq_type; 297 295 in.log_size = ilog2(srq->msrq.max); 298 296 in.wqe_shift = srq->msrq.wqe_shift - 4; 299 297 if (srq->wq_sig)
+2 -4
drivers/infiniband/hw/mthca/mthca_av.c
··· 186 186 187 187 on_hca_fail: 188 188 if (ah->type == MTHCA_AH_PCI_POOL) { 189 - ah->av = pci_pool_alloc(dev->av_table.pool, 190 - GFP_ATOMIC, &ah->avdma); 189 + ah->av = pci_pool_zalloc(dev->av_table.pool, 190 + GFP_ATOMIC, &ah->avdma); 191 191 if (!ah->av) 192 192 return -ENOMEM; 193 193 ··· 195 195 } 196 196 197 197 ah->key = pd->ntmr.ibmr.lkey; 198 - 199 - memset(av, 0, MTHCA_AV_SIZE); 200 198 201 199 av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); 202 200 av->g_slid = ah_attr->src_path_bits;
+3 -1
drivers/infiniband/hw/mthca/mthca_provider.c
··· 410 410 } 411 411 412 412 static struct ib_ah *mthca_ah_create(struct ib_pd *pd, 413 - struct ib_ah_attr *ah_attr) 413 + struct ib_ah_attr *ah_attr, 414 + struct ib_udata *udata) 415 + 414 416 { 415 417 int err; 416 418 struct mthca_ah *ah;
+2 -1
drivers/infiniband/hw/nes/nes_verbs.c
··· 771 771 /** 772 772 * nes_create_ah 773 773 */ 774 - static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 774 + static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 775 + struct ib_udata *udata) 775 776 { 776 777 return ERR_PTR(-ENOSYS); 777 778 }
+2 -1
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
··· 154 154 return status; 155 155 } 156 156 157 - struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 157 + struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, 158 + struct ib_udata *udata) 158 159 { 159 160 u32 *ahid_addr; 160 161 int status;
+3 -1
drivers/infiniband/hw/ocrdma/ocrdma_ah.h
··· 50 50 OCRDMA_AH_L3_TYPE_MASK = 0x03, 51 51 OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ 52 52 }; 53 - struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); 53 + 54 + struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *, 55 + struct ib_udata *); 54 56 int ocrdma_destroy_ah(struct ib_ah *); 55 57 int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); 56 58 int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+2 -1
drivers/infiniband/hw/qedr/verbs.c
··· 2094 2094 return rc; 2095 2095 } 2096 2096 2097 - struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 2097 + struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, 2098 + struct ib_udata *udata) 2098 2099 { 2099 2100 struct qedr_ah *ah; 2100 2101
+2 -1
drivers/infiniband/hw/qedr/verbs.h
··· 70 70 int qp_attr_mask, struct ib_qp_init_attr *); 71 71 int qedr_destroy_qp(struct ib_qp *ibqp); 72 72 73 - struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); 73 + struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, 74 + struct ib_udata *udata); 74 75 int qedr_destroy_ah(struct ib_ah *ibah); 75 76 76 77 int qedr_dereg_mr(struct ib_mr *);
+3 -1
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 738 738 739 739 /* In ib callbacks section - Start of stub funcs */ 740 740 struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, 741 - struct ib_ah_attr *ah_attr) 741 + struct ib_ah_attr *ah_attr, 742 + struct ib_udata *udata) 743 + 742 744 { 743 745 usnic_dbg("\n"); 744 746 return ERR_PTR(-EPERM);
+3 -1
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
··· 75 75 int usnic_ib_mmap(struct ib_ucontext *context, 76 76 struct vm_area_struct *vma); 77 77 struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, 78 - struct ib_ah_attr *ah_attr); 78 + struct ib_ah_attr *ah_attr, 79 + struct ib_udata *udata); 80 + 79 81 int usnic_ib_destroy_ah(struct ib_ah *ah); 80 82 int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 81 83 struct ib_send_wr **bad_wr);
+1 -1
drivers/infiniband/sw/rxe/rxe_param.h
··· 82 82 RXE_MAX_SGE = 32, 83 83 RXE_MAX_SGE_RD = 32, 84 84 RXE_MAX_CQ = 16384, 85 - RXE_MAX_LOG_CQE = 13, 85 + RXE_MAX_LOG_CQE = 15, 86 86 RXE_MAX_MR = 2 * 1024, 87 87 RXE_MAX_PD = 0x7ffc, 88 88 RXE_MAX_QP_RD_ATOM = 128,
+3 -1
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 316 316 return err; 317 317 } 318 318 319 - static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 319 + static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, 320 + struct ib_udata *udata) 321 + 320 322 { 321 323 int err; 322 324 struct rxe_dev *rxe = to_rdev(ibpd->device);
-2
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 1050 1050 1051 1051 tx_qp = ib_create_qp(priv->pd, &attr); 1052 1052 if (PTR_ERR(tx_qp) == -EINVAL) { 1053 - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", 1054 - priv->ca->name); 1055 1053 attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; 1056 1054 tx_qp = ib_create_qp(priv->pd, &attr); 1057 1055 }
+3 -2
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
··· 1605 1605 r->com.from_state = r->com.state; 1606 1606 r->com.to_state = state; 1607 1607 r->com.state = RES_EQ_BUSY; 1608 - if (eq) 1609 - *eq = r; 1610 1608 } 1611 1609 } 1612 1610 1613 1611 spin_unlock_irq(mlx4_tlock(dev)); 1612 + 1613 + if (!err && eq) 1614 + *eq = r; 1614 1615 1615 1616 return err; 1616 1617 }
+1 -1
include/linux/mlx5/mlx5_ifc.h
··· 576 576 u8 self_lb_en_modifiable[0x1]; 577 577 u8 reserved_at_9[0x2]; 578 578 u8 max_lso_cap[0x5]; 579 - u8 reserved_at_10[0x2]; 579 + u8 multi_pkt_send_wqe[0x2]; 580 580 u8 wqe_inline_mode[0x2]; 581 581 u8 rss_ind_tbl_cap[0x4]; 582 582 u8 reg_umr_sq[0x1];
+56 -14
include/rdma/ib_verbs.h
··· 1102 1102 IB_QP_RESERVED2 = (1<<22), 1103 1103 IB_QP_RESERVED3 = (1<<23), 1104 1104 IB_QP_RESERVED4 = (1<<24), 1105 + IB_QP_RATE_LIMIT = (1<<25), 1105 1106 }; 1106 1107 1107 1108 enum ib_qp_state { ··· 1152 1151 u8 rnr_retry; 1153 1152 u8 alt_port_num; 1154 1153 u8 alt_timeout; 1154 + u32 rate_limit; 1155 1155 }; 1156 1156 1157 1157 enum ib_wr_opcode { ··· 1594 1592 /* Supported steering header types */ 1595 1593 enum ib_flow_spec_type { 1596 1594 /* L2 headers*/ 1597 - IB_FLOW_SPEC_ETH = 0x20, 1598 - IB_FLOW_SPEC_IB = 0x22, 1595 + IB_FLOW_SPEC_ETH = 0x20, 1596 + IB_FLOW_SPEC_IB = 0x22, 1599 1597 /* L3 header*/ 1600 - IB_FLOW_SPEC_IPV4 = 0x30, 1601 - IB_FLOW_SPEC_IPV6 = 0x31, 1598 + IB_FLOW_SPEC_IPV4 = 0x30, 1599 + IB_FLOW_SPEC_IPV6 = 0x31, 1602 1600 /* L4 headers*/ 1603 - IB_FLOW_SPEC_TCP = 0x40, 1604 - IB_FLOW_SPEC_UDP = 0x41 1601 + IB_FLOW_SPEC_TCP = 0x40, 1602 + IB_FLOW_SPEC_UDP = 0x41, 1603 + IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, 1604 + IB_FLOW_SPEC_INNER = 0x100, 1605 1605 }; 1606 1606 #define IB_FLOW_SPEC_LAYER_MASK 0xF0 1607 - #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 1607 + #define IB_FLOW_SPEC_SUPPORT_LAYERS 8 1608 1608 1609 1609 /* Flow steering rule priority is set according to it's domain. 1610 1610 * Lower domain value means higher priority. ··· 1634 1630 }; 1635 1631 1636 1632 struct ib_flow_spec_eth { 1637 - enum ib_flow_spec_type type; 1633 + u32 type; 1638 1634 u16 size; 1639 1635 struct ib_flow_eth_filter val; 1640 1636 struct ib_flow_eth_filter mask; ··· 1648 1644 }; 1649 1645 1650 1646 struct ib_flow_spec_ib { 1651 - enum ib_flow_spec_type type; 1647 + u32 type; 1652 1648 u16 size; 1653 1649 struct ib_flow_ib_filter val; 1654 1650 struct ib_flow_ib_filter mask; ··· 1673 1669 }; 1674 1670 1675 1671 struct ib_flow_spec_ipv4 { 1676 - enum ib_flow_spec_type type; 1672 + u32 type; 1677 1673 u16 size; 1678 1674 struct ib_flow_ipv4_filter val; 1679 1675 struct ib_flow_ipv4_filter mask; ··· 1691 1687 }; 1692 1688 1693 1689 struct ib_flow_spec_ipv6 { 1694 - enum ib_flow_spec_type type; 1690 + u32 type; 1695 1691 u16 size; 1696 1692 struct ib_flow_ipv6_filter val; 1697 1693 struct ib_flow_ipv6_filter mask; ··· 1705 1701 }; 1706 1702 1707 1703 struct ib_flow_spec_tcp_udp { 1708 - enum ib_flow_spec_type type; 1704 + u32 type; 1709 1705 u16 size; 1710 1706 struct ib_flow_tcp_udp_filter val; 1711 1707 struct ib_flow_tcp_udp_filter mask; 1712 1708 }; 1713 1709 1710 + struct ib_flow_tunnel_filter { 1711 + __be32 tunnel_id; 1712 + u8 real_sz[0]; 1713 + }; 1714 + 1715 + /* ib_flow_spec_tunnel describes the Vxlan tunnel 1716 + * the tunnel_id from val has the vni value 1717 + */ 1718 + struct ib_flow_spec_tunnel { 1719 + u32 type; 1720 + u16 size; 1721 + struct ib_flow_tunnel_filter val; 1722 + struct ib_flow_tunnel_filter mask; 1723 + }; 1724 + 1714 1725 union ib_flow_spec { 1715 1726 struct { 1716 - enum ib_flow_spec_type type; 1727 + u32 type; 1717 1728 u16 size; 1718 1729 }; 1719 1730 struct ib_flow_spec_eth eth; ··· 1736 1717 struct ib_flow_spec_ipv4 ipv4; 1737 1718 struct ib_flow_spec_tcp_udp tcp_udp; 1738 1719 struct ib_flow_spec_ipv6 ipv6; 1720 + struct ib_flow_spec_tunnel tunnel; 1739 1721 }; 1740 1722 1741 1723 struct ib_flow_attr { ··· 1953 1933 struct ib_udata *udata); 1954 1934 int (*dealloc_pd)(struct ib_pd *pd); 1955 1935 struct ib_ah * (*create_ah)(struct ib_pd *pd, 1956 - struct ib_ah_attr *ah_attr); 1936 + struct ib_ah_attr *ah_attr, 1937 + struct ib_udata *udata); 1957 1938 int (*modify_ah)(struct ib_ah *ah, 1958 1939 struct ib_ah_attr *ah_attr); 1959 1940 int (*query_ah)(struct ib_ah *ah, ··· 2600 2579 * in all UD QP post sends. 2601 2580 */ 2602 2581 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 2582 + 2583 + /** 2584 + * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header 2585 + * work completion. 2586 + * @hdr: the L3 header to parse 2587 + * @net_type: type of header to parse 2588 + * @sgid: place to store source gid 2589 + * @dgid: place to store destination gid 2590 + */ 2591 + int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, 2592 + enum rdma_network_type net_type, 2593 + union ib_gid *sgid, union ib_gid *dgid); 2594 + 2595 + /** 2596 + * ib_get_rdma_header_version - Get the header version 2597 + * @hdr: the L3 header to parse 2598 + */ 2599 + int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); 2603 2600 2604 2601 /** 2605 2602 * ib_init_ah_from_wc - Initializes address handle attributes from a ··· 3396 3357 void ib_drain_rq(struct ib_qp *qp); 3397 3358 void ib_drain_sq(struct ib_qp *qp); 3398 3359 void ib_drain_qp(struct ib_qp *qp); 3360 + 3361 + int ib_resolve_eth_dmac(struct ib_device *device, 3362 + struct ib_ah_attr *ah_attr); 3399 3363 #endif /* IB_VERBS_H */
+38
include/uapi/rdma/ib_user_verbs.h
··· 37 37 #define IB_USER_VERBS_H 38 38 39 39 #include <linux/types.h> 40 + #include <rdma/ib_verbs.h> 40 41 41 42 /* 42 43 * Increment this value if any changes that break userspace ABI ··· 94 93 IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, 95 94 IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, 96 95 IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, 96 + IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP, 97 97 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 98 98 IB_USER_VERBS_EX_CMD_DESTROY_FLOW, 99 99 IB_USER_VERBS_EX_CMD_CREATE_WQ, ··· 547 545 IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, 548 546 }; 549 547 548 + enum { 549 + IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN 550 + }; 551 + 552 + enum { 553 + IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT 554 + }; 555 + 550 556 struct ib_uverbs_ex_create_qp { 551 557 __u64 user_handle; 552 558 __u32 pd_handle; ··· 694 684 __u64 driver_data[0]; 695 685 }; 696 686 687 + struct ib_uverbs_ex_modify_qp { 688 + struct ib_uverbs_modify_qp base; 689 + __u32 rate_limit; 690 + __u32 reserved; 691 + }; 692 + 697 693 struct ib_uverbs_modify_qp_resp { 694 + }; 695 + 696 + struct ib_uverbs_ex_modify_qp_resp { 697 + __u32 comp_mask; 698 + __u32 response_length; 698 699 }; 699 700 700 701 struct ib_uverbs_destroy_qp { ··· 927 906 }; 928 907 struct ib_uverbs_flow_ipv6_filter val; 929 908 struct ib_uverbs_flow_ipv6_filter mask; 909 + }; 910 + 911 + struct ib_uverbs_flow_tunnel_filter { 912 + __be32 tunnel_id; 913 + }; 914 + 915 + struct ib_uverbs_flow_spec_tunnel { 916 + union { 917 + struct ib_uverbs_flow_spec_hdr hdr; 918 + struct { 919 + __u32 type; 920 + __u16 size; 921 + __u16 reserved; 922 + }; 923 + }; 924 + struct ib_uverbs_flow_tunnel_filter val; 925 + struct ib_uverbs_flow_tunnel_filter mask; 930 926 }; 931 927 932 928 struct ib_uverbs_flow_attr {
+37 -1
include/uapi/rdma/mlx5-abi.h
··· 82 82 83 83 enum mlx5_user_cmds_supp_uhw { 84 84 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, 85 + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, 85 86 }; 86 87 87 88 struct mlx5_ib_alloc_ucontext_resp { ··· 125 124 __u8 reserved[7]; 126 125 }; 127 126 127 + enum mlx5_ib_cqe_comp_res_format { 128 + MLX5_IB_CQE_RES_FORMAT_HASH = 1 << 0, 129 + MLX5_IB_CQE_RES_FORMAT_CSUM = 1 << 1, 130 + MLX5_IB_CQE_RES_RESERVED = 1 << 2, 131 + }; 132 + 133 + struct mlx5_ib_cqe_comp_caps { 134 + __u32 max_num; 135 + __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ 136 + }; 137 + 138 + struct mlx5_packet_pacing_caps { 139 + __u32 qp_rate_limit_min; 140 + __u32 qp_rate_limit_max; /* In kpbs */ 141 + 142 + /* Corresponding bit will be set if qp type from 143 + * 'enum ib_qp_type' is supported, e.g. 144 + * supported_qpts |= 1 << IB_QPT_RAW_PACKET 145 + */ 146 + __u32 supported_qpts; 147 + __u32 reserved; 148 + }; 149 + 128 150 struct mlx5_ib_query_device_resp { 129 151 __u32 comp_mask; 130 152 __u32 response_length; 131 153 struct mlx5_ib_tso_caps tso_caps; 132 154 struct mlx5_ib_rss_caps rss_caps; 155 + struct mlx5_ib_cqe_comp_caps cqe_comp_caps; 156 + struct mlx5_packet_pacing_caps packet_pacing_caps; 157 + __u32 mlx5_ib_support_multi_pkt_send_wqes; 158 + __u32 reserved; 133 159 }; 134 160 135 161 struct mlx5_ib_create_cq { 136 162 __u64 buf_addr; 137 163 __u64 db_addr; 138 164 __u32 cqe_size; 139 - __u32 reserved; /* explicit padding (optional on i386) */ 165 + __u8 cqe_comp_en; 166 + __u8 cqe_comp_res_format; 167 + __u16 reserved; /* explicit padding (optional on i386) */ 140 168 }; 141 169 142 170 struct mlx5_ib_create_cq_resp { ··· 260 230 __u32 flags; 261 231 __u32 comp_mask; 262 232 __u32 reserved; 233 + }; 234 + 235 + struct mlx5_ib_create_ah_resp { 236 + __u32 response_length; 237 + __u8 dmac[ETH_ALEN]; 238 + __u8 reserved[6]; 263 239 }; 264 240 265 241 struct mlx5_ib_create_wq_resp {