Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

+2

MAINTAINERS

··· 22212 22212 VIRTIO CORE AND NET DRIVERS 22213 22213 M: "Michael S. Tsirkin" <mst@redhat.com> 22214 22214 M: Jason Wang <jasowang@redhat.com> 22215 + R: Xuan Zhuo <xuanzhuo@linux.alibaba.com> 22215 22216 L: virtualization@lists.linux-foundation.org 22216 22217 S: Maintained 22217 22218 F: Documentation/ABI/testing/sysfs-bus-vdpa ··· 22226 22225 F: drivers/virtio/ 22227 22226 F: include/linux/vdpa.h 22228 22227 F: include/linux/virtio*.h 22228 + F: include/linux/vringh.h 22229 22229 F: include/uapi/linux/virtio_*.h 22230 22230 F: tools/virtio/ 22231 22231

+19 -3

drivers/s390/virtio/virtio_ccw.c

··· 391 391 ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area)); 392 392 } 393 393 394 - static bool virtio_ccw_kvm_notify(struct virtqueue *vq) 394 + static inline bool virtio_ccw_do_kvm_notify(struct virtqueue *vq, u32 data) 395 395 { 396 396 struct virtio_ccw_vq_info *info = vq->priv; 397 397 struct virtio_ccw_device *vcdev; ··· 402 402 BUILD_BUG_ON(sizeof(struct subchannel_id) != sizeof(unsigned int)); 403 403 info->cookie = kvm_hypercall3(KVM_S390_VIRTIO_CCW_NOTIFY, 404 404 *((unsigned int *)&schid), 405 - vq->index, info->cookie); 405 + data, info->cookie); 406 406 if (info->cookie < 0) 407 407 return false; 408 408 return true; 409 + } 410 + 411 + static bool virtio_ccw_kvm_notify(struct virtqueue *vq) 412 + { 413 + return virtio_ccw_do_kvm_notify(vq, vq->index); 414 + } 415 + 416 + static bool virtio_ccw_kvm_notify_with_data(struct virtqueue *vq) 417 + { 418 + return virtio_ccw_do_kvm_notify(vq, vring_notification_data(vq)); 409 419 } 410 420 411 421 static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, ··· 505 495 struct ccw1 *ccw) 506 496 { 507 497 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 498 + bool (*notify)(struct virtqueue *vq); 508 499 int err; 509 500 struct virtqueue *vq = NULL; 510 501 struct virtio_ccw_vq_info *info; 511 502 u64 queue; 512 503 unsigned long flags; 513 504 bool may_reduce; 505 + 506 + if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA)) 507 + notify = virtio_ccw_kvm_notify_with_data; 508 + else 509 + notify = virtio_ccw_kvm_notify; 514 510 515 511 /* Allocate queue. */ 516 512 info = kzalloc(sizeof(struct virtio_ccw_vq_info), GFP_KERNEL); ··· 540 524 may_reduce = vcdev->revision > 0; 541 525 vq = vring_create_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, 542 526 vdev, true, may_reduce, ctx, 543 - virtio_ccw_kvm_notify, callback, name); 527 + notify, callback, name); 544 528 545 529 if (!vq) { 546 530 /* For now, we fail if we can't get the requested size. */

+156 -105

drivers/vdpa/mlx5/net/mlx5_vnet.c

··· 778 778 return idx % 2; 779 779 } 780 780 781 - static u16 get_features_12_3(u64 features) 781 + enum { 782 + MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 783 + MLX5_VIRTIO_NET_F_HOST_ECN = 4, 784 + MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 785 + MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 786 + MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 787 + MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 788 + MLX5_VIRTIO_NET_F_CSUM = 10, 789 + MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 790 + MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 791 + }; 792 + 793 + static u16 get_features(u64 features) 782 794 { 783 - return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) | 784 - (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) | 785 - (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) | 786 - (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); 795 + return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 796 + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 797 + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 798 + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 799 + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 800 + (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 801 + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 802 + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 787 803 } 788 804 789 805 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) ··· 813 797 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 814 798 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 815 799 void *obj_context; 800 + u16 mlx_features; 816 801 void *cmd_hdr; 817 802 void *vq_ctx; 818 803 void *in; ··· 829 812 goto err_alloc; 830 813 } 831 814 815 + mlx_features = get_features(ndev->mvdev.actual_features); 832 816 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 833 817 834 818 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); ··· 840 822 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 841 823 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 842 824 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 843 - get_features_12_3(ndev->mvdev.actual_features)); 825 + mlx_features >> 3); 826 + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 827 + mlx_features & 7); 844 828 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 845 829 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 846 830 ··· 2191 2171 return MLX5_VDPA_DATAVQ_GROUP; 2192 2172 } 2193 2173 2194 - enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, 2195 - MLX5_VIRTIO_NET_F_CSUM = 1 << 10, 2196 - MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, 2197 - MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, 2198 - }; 2199 - 2200 2174 static u64 mlx_to_vritio_features(u16 dev_features) 2201 2175 { 2202 2176 u64 result = 0; 2203 2177 2204 - if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) 2178 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2179 + result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2180 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2181 + result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2182 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2183 + result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2184 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2185 + result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2186 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2187 + result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2188 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2205 2189 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2206 - if (dev_features & MLX5_VIRTIO_NET_F_CSUM) 2190 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2207 2191 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2208 - if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) 2192 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2209 2193 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2210 - if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) 2194 + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2211 2195 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2212 2196 2213 2197 return result; ··· 2320 2296 /* Two data virtqueues only: one for rx and one for tx */ 2321 2297 mvdev->max_idx = 1; 2322 2298 } 2299 + } 2300 + 2301 + static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2302 + { 2303 + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2304 + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2305 + int err; 2306 + 2307 + MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2308 + MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2309 + MLX5_SET(query_vport_state_in, in, vport_number, vport); 2310 + if (vport) 2311 + MLX5_SET(query_vport_state_in, in, other_vport, 1); 2312 + 2313 + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2314 + if (err) 2315 + return 0; 2316 + 2317 + return MLX5_GET(query_vport_state_out, out, state); 2318 + } 2319 + 2320 + static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2321 + { 2322 + if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2323 + VPORT_STATE_UP) 2324 + return true; 2325 + 2326 + return false; 2327 + } 2328 + 2329 + static void update_carrier(struct work_struct *work) 2330 + { 2331 + struct mlx5_vdpa_wq_ent *wqent; 2332 + struct mlx5_vdpa_dev *mvdev; 2333 + struct mlx5_vdpa_net *ndev; 2334 + 2335 + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2336 + mvdev = wqent->mvdev; 2337 + ndev = to_mlx5_vdpa_ndev(mvdev); 2338 + if (get_link_state(mvdev)) 2339 + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2340 + else 2341 + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2342 + 2343 + if (ndev->config_cb.callback) 2344 + ndev->config_cb.callback(ndev->config_cb.private); 2345 + 2346 + kfree(wqent); 2347 + } 2348 + 2349 + static int queue_link_work(struct mlx5_vdpa_net *ndev) 2350 + { 2351 + struct mlx5_vdpa_wq_ent *wqent; 2352 + 2353 + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2354 + if (!wqent) 2355 + return -ENOMEM; 2356 + 2357 + wqent->mvdev = &ndev->mvdev; 2358 + INIT_WORK(&wqent->work, update_carrier); 2359 + queue_work(ndev->mvdev.wq, &wqent->work); 2360 + return 0; 2361 + } 2362 + 2363 + static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2364 + { 2365 + struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2366 + struct mlx5_eqe *eqe = param; 2367 + int ret = NOTIFY_DONE; 2368 + 2369 + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2370 + switch (eqe->sub_type) { 2371 + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2372 + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2373 + if (queue_link_work(ndev)) 2374 + return NOTIFY_DONE; 2375 + 2376 + ret = NOTIFY_OK; 2377 + break; 2378 + default: 2379 + return NOTIFY_DONE; 2380 + } 2381 + return ret; 2382 + } 2383 + return ret; 2384 + } 2385 + 2386 + static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2387 + { 2388 + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2389 + return; 2390 + 2391 + ndev->nb.notifier_call = event_handler; 2392 + mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2393 + ndev->nb_registered = true; 2394 + queue_link_work(ndev); 2395 + } 2396 + 2397 + static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2398 + { 2399 + if (!ndev->nb_registered) 2400 + return; 2401 + 2402 + ndev->nb_registered = false; 2403 + mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2404 + if (ndev->mvdev.wq) 2405 + flush_workqueue(ndev->mvdev.wq); 2323 2406 } 2324 2407 2325 2408 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) ··· 2698 2567 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2699 2568 goto err_setup; 2700 2569 } 2570 + register_link_notifier(ndev); 2701 2571 err = setup_driver(mvdev); 2702 2572 if (err) { 2703 2573 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2704 - goto err_setup; 2574 + goto err_driver; 2705 2575 } 2706 2576 } else { 2707 2577 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); ··· 2714 2582 up_write(&ndev->reslock); 2715 2583 return; 2716 2584 2585 + err_driver: 2586 + unregister_link_notifier(ndev); 2717 2587 err_setup: 2718 2588 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2719 2589 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; ··· 2741 2607 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2742 2608 2743 2609 down_write(&ndev->reslock); 2610 + unregister_link_notifier(ndev); 2744 2611 teardown_driver(ndev); 2745 2612 clear_vqs_ready(ndev); 2746 2613 mlx5_vdpa_destroy_mr(&ndev->mvdev); ··· 2996 2861 mlx5_vdpa_info(mvdev, "suspending device\n"); 2997 2862 2998 2863 down_write(&ndev->reslock); 2999 - ndev->nb_registered = false; 3000 - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); 3001 - flush_workqueue(ndev->mvdev.wq); 2864 + unregister_link_notifier(ndev); 3002 2865 for (i = 0; i < ndev->cur_num_vqs; i++) { 3003 2866 mvq = &ndev->vqs[i]; 3004 2867 suspend_vq(ndev, mvq); ··· 3133 3000 struct mlx5_vdpa_net *ndev; 3134 3001 }; 3135 3002 3136 - static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 3137 - { 3138 - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 3139 - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 3140 - int err; 3141 - 3142 - MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 3143 - MLX5_SET(query_vport_state_in, in, op_mod, opmod); 3144 - MLX5_SET(query_vport_state_in, in, vport_number, vport); 3145 - if (vport) 3146 - MLX5_SET(query_vport_state_in, in, other_vport, 1); 3147 - 3148 - err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 3149 - if (err) 3150 - return 0; 3151 - 3152 - return MLX5_GET(query_vport_state_out, out, state); 3153 - } 3154 - 3155 - static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 3156 - { 3157 - if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 3158 - VPORT_STATE_UP) 3159 - return true; 3160 - 3161 - return false; 3162 - } 3163 - 3164 - static void update_carrier(struct work_struct *work) 3165 - { 3166 - struct mlx5_vdpa_wq_ent *wqent; 3167 - struct mlx5_vdpa_dev *mvdev; 3168 - struct mlx5_vdpa_net *ndev; 3169 - 3170 - wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 3171 - mvdev = wqent->mvdev; 3172 - ndev = to_mlx5_vdpa_ndev(mvdev); 3173 - if (get_link_state(mvdev)) 3174 - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3175 - else 3176 - ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3177 - 3178 - if (ndev->nb_registered && ndev->config_cb.callback) 3179 - ndev->config_cb.callback(ndev->config_cb.private); 3180 - 3181 - kfree(wqent); 3182 - } 3183 - 3184 - static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 3185 - { 3186 - struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 3187 - struct mlx5_eqe *eqe = param; 3188 - int ret = NOTIFY_DONE; 3189 - struct mlx5_vdpa_wq_ent *wqent; 3190 - 3191 - if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 3192 - if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 3193 - return NOTIFY_DONE; 3194 - switch (eqe->sub_type) { 3195 - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 3196 - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 3197 - wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 3198 - if (!wqent) 3199 - return NOTIFY_DONE; 3200 - 3201 - wqent->mvdev = &ndev->mvdev; 3202 - INIT_WORK(&wqent->work, update_carrier); 3203 - queue_work(ndev->mvdev.wq, &wqent->work); 3204 - ret = NOTIFY_OK; 3205 - break; 3206 - default: 3207 - return NOTIFY_DONE; 3208 - } 3209 - return ret; 3210 - } 3211 - return ret; 3212 - } 3213 - 3214 3003 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3215 3004 { 3216 3005 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); ··· 3182 3127 return -EINVAL; 3183 3128 } 3184 3129 device_features &= add_config->device_features; 3130 + } else { 3131 + device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3185 3132 } 3186 3133 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3187 3134 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { ··· 3315 3258 goto err_res2; 3316 3259 } 3317 3260 3318 - ndev->nb.notifier_call = event_handler; 3319 - mlx5_notifier_register(mdev, &ndev->nb); 3320 - ndev->nb_registered = true; 3321 3261 mvdev->vdev.mdev = &mgtdev->mgtdev; 3322 3262 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3323 3263 if (err) ··· 3348 3294 3349 3295 mlx5_vdpa_remove_debugfs(ndev->debugfs); 3350 3296 ndev->debugfs = NULL; 3351 - if (ndev->nb_registered) { 3352 - ndev->nb_registered = false; 3353 - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); 3354 - } 3297 + unregister_link_notifier(ndev); 3355 3298 wq = mvdev->wq; 3356 3299 mvdev->wq = NULL; 3357 3300 destroy_workqueue(wq);

+1

drivers/vdpa/solidrun/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o 3 3 snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o 4 + snet_vdpa-$(CONFIG_SNET_VDPA) += snet_ctrl.o 4 5 ifdef CONFIG_HWMON 5 6 snet_vdpa-$(CONFIG_SNET_VDPA) += snet_hwmon.o 6 7 endif

+330

drivers/vdpa/solidrun/snet_ctrl.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * SolidRun DPU driver for control plane 4 + * 5 + * Copyright (C) 2022-2023 SolidRun 6 + * 7 + * Author: Alvaro Karsz <alvaro.karsz@solid-run.com> 8 + * 9 + */ 10 + 11 + #include <linux/iopoll.h> 12 + 13 + #include "snet_vdpa.h" 14 + 15 + enum snet_ctrl_opcodes { 16 + SNET_CTRL_OP_DESTROY = 1, 17 + SNET_CTRL_OP_READ_VQ_STATE, 18 + SNET_CTRL_OP_SUSPEND, 19 + }; 20 + 21 + #define SNET_CTRL_TIMEOUT 2000000 22 + 23 + #define SNET_CTRL_DATA_SIZE_MASK 0x0000FFFF 24 + #define SNET_CTRL_IN_PROCESS_MASK 0x00010000 25 + #define SNET_CTRL_CHUNK_RDY_MASK 0x00020000 26 + #define SNET_CTRL_ERROR_MASK 0x0FFC0000 27 + 28 + #define SNET_VAL_TO_ERR(val) (-(((val) & SNET_CTRL_ERROR_MASK) >> 18)) 29 + #define SNET_EMPTY_CTRL(val) (((val) & SNET_CTRL_ERROR_MASK) || \ 30 + !((val) & SNET_CTRL_IN_PROCESS_MASK)) 31 + #define SNET_DATA_READY(val) ((val) & (SNET_CTRL_ERROR_MASK | SNET_CTRL_CHUNK_RDY_MASK)) 32 + 33 + /* Control register used to read data from the DPU */ 34 + struct snet_ctrl_reg_ctrl { 35 + /* Chunk size in 4B words */ 36 + u16 data_size; 37 + /* We are in the middle of a command */ 38 + u16 in_process:1; 39 + /* A data chunk is ready and can be consumed */ 40 + u16 chunk_ready:1; 41 + /* Error code */ 42 + u16 error:10; 43 + /* Saved for future usage */ 44 + u16 rsvd:4; 45 + }; 46 + 47 + /* Opcode register */ 48 + struct snet_ctrl_reg_op { 49 + u16 opcode; 50 + /* Only if VQ index is relevant for the command */ 51 + u16 vq_idx; 52 + }; 53 + 54 + struct snet_ctrl_regs { 55 + struct snet_ctrl_reg_op op; 56 + struct snet_ctrl_reg_ctrl ctrl; 57 + u32 rsvd; 58 + u32 data[]; 59 + }; 60 + 61 + static struct snet_ctrl_regs __iomem *snet_get_ctrl(struct snet *snet) 62 + { 63 + return snet->bar + snet->psnet->cfg.ctrl_off; 64 + } 65 + 66 + static int snet_wait_for_empty_ctrl(struct snet_ctrl_regs __iomem *regs) 67 + { 68 + u32 val; 69 + 70 + return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_EMPTY_CTRL(val), 10, 71 + SNET_CTRL_TIMEOUT); 72 + } 73 + 74 + static int snet_wait_for_empty_op(struct snet_ctrl_regs __iomem *regs) 75 + { 76 + u32 val; 77 + 78 + return readx_poll_timeout(ioread32, &regs->op, val, !val, 10, SNET_CTRL_TIMEOUT); 79 + } 80 + 81 + static int snet_wait_for_data(struct snet_ctrl_regs __iomem *regs) 82 + { 83 + u32 val; 84 + 85 + return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_DATA_READY(val), 10, 86 + SNET_CTRL_TIMEOUT); 87 + } 88 + 89 + static u32 snet_read32_word(struct snet_ctrl_regs __iomem *ctrl_regs, u16 word_idx) 90 + { 91 + return ioread32(&ctrl_regs->data[word_idx]); 92 + } 93 + 94 + static u32 snet_read_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs) 95 + { 96 + return ioread32(&ctrl_regs->ctrl); 97 + } 98 + 99 + static void snet_write_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val) 100 + { 101 + iowrite32(val, &ctrl_regs->ctrl); 102 + } 103 + 104 + static void snet_write_op(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val) 105 + { 106 + iowrite32(val, &ctrl_regs->op); 107 + } 108 + 109 + static int snet_wait_for_dpu_completion(struct snet_ctrl_regs __iomem *ctrl_regs) 110 + { 111 + /* Wait until the DPU finishes completely. 112 + * It will clear the opcode register. 113 + */ 114 + return snet_wait_for_empty_op(ctrl_regs); 115 + } 116 + 117 + /* Reading ctrl from the DPU: 118 + * buf_size must be 4B aligned 119 + * 120 + * Steps: 121 + * 122 + * (1) Verify that the DPU is not in the middle of another operation by 123 + * reading the in_process and error bits in the control register. 124 + * (2) Write the request opcode and the VQ idx in the opcode register 125 + * and write the buffer size in the control register. 126 + * (3) Start readind chunks of data, chunk_ready bit indicates that a 127 + * data chunk is available, we signal that we read the data by clearing the bit. 128 + * (4) Detect that the transfer is completed when the in_process bit 129 + * in the control register is cleared or when the an error appears. 130 + */ 131 + static int snet_ctrl_read_from_dpu(struct snet *snet, u16 opcode, u16 vq_idx, void *buffer, 132 + u32 buf_size) 133 + { 134 + struct pci_dev *pdev = snet->pdev; 135 + struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet); 136 + u32 *bfr_ptr = (u32 *)buffer; 137 + u32 val; 138 + u16 buf_words; 139 + int ret; 140 + u16 words, i, tot_words = 0; 141 + 142 + /* Supported for config 2+ */ 143 + if (!SNET_CFG_VER(snet, 2)) 144 + return -EOPNOTSUPP; 145 + 146 + if (!IS_ALIGNED(buf_size, 4)) 147 + return -EINVAL; 148 + 149 + mutex_lock(&snet->ctrl_lock); 150 + 151 + buf_words = buf_size / 4; 152 + 153 + /* Make sure control register is empty */ 154 + ret = snet_wait_for_empty_ctrl(regs); 155 + if (ret) { 156 + SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n"); 157 + goto exit; 158 + } 159 + 160 + /* We need to write the buffer size in the control register, and the opcode + vq index in 161 + * the opcode register. 162 + * We use a spinlock to serialize the writes. 163 + */ 164 + spin_lock(&snet->ctrl_spinlock); 165 + 166 + snet_write_ctrl(regs, buf_words); 167 + snet_write_op(regs, opcode | (vq_idx << 16)); 168 + 169 + spin_unlock(&snet->ctrl_spinlock); 170 + 171 + while (buf_words != tot_words) { 172 + ret = snet_wait_for_data(regs); 173 + if (ret) { 174 + SNET_WARN(pdev, "Timeout waiting for control data\n"); 175 + goto exit; 176 + } 177 + 178 + val = snet_read_ctrl(regs); 179 + 180 + /* Error? */ 181 + if (val & SNET_CTRL_ERROR_MASK) { 182 + ret = SNET_VAL_TO_ERR(val); 183 + SNET_WARN(pdev, "Error while reading control data from DPU, err %d\n", ret); 184 + goto exit; 185 + } 186 + 187 + words = min_t(u16, val & SNET_CTRL_DATA_SIZE_MASK, buf_words - tot_words); 188 + 189 + for (i = 0; i < words; i++) { 190 + *bfr_ptr = snet_read32_word(regs, i); 191 + bfr_ptr++; 192 + } 193 + 194 + tot_words += words; 195 + 196 + /* Is the job completed? */ 197 + if (!(val & SNET_CTRL_IN_PROCESS_MASK)) 198 + break; 199 + 200 + /* Clear the chunk ready bit and continue */ 201 + val &= ~SNET_CTRL_CHUNK_RDY_MASK; 202 + snet_write_ctrl(regs, val); 203 + } 204 + 205 + ret = snet_wait_for_dpu_completion(regs); 206 + if (ret) 207 + SNET_WARN(pdev, "Timeout waiting for the DPU to complete a control command\n"); 208 + 209 + exit: 210 + mutex_unlock(&snet->ctrl_lock); 211 + return ret; 212 + } 213 + 214 + /* Send a control message to the DPU using the old mechanism 215 + * used with config version 1. 216 + */ 217 + static int snet_send_ctrl_msg_old(struct snet *snet, u32 opcode) 218 + { 219 + struct pci_dev *pdev = snet->pdev; 220 + struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet); 221 + int ret; 222 + 223 + mutex_lock(&snet->ctrl_lock); 224 + 225 + /* Old mechanism uses just 1 register, the opcode register. 226 + * Make sure that the opcode register is empty, and that the DPU isn't 227 + * processing an old message. 228 + */ 229 + ret = snet_wait_for_empty_op(regs); 230 + if (ret) { 231 + SNET_WARN(pdev, "Timeout waiting for previous control message to be ACKed\n"); 232 + goto exit; 233 + } 234 + 235 + /* Write the message */ 236 + snet_write_op(regs, opcode); 237 + 238 + /* DPU ACKs the message by clearing the opcode register */ 239 + ret = snet_wait_for_empty_op(regs); 240 + if (ret) 241 + SNET_WARN(pdev, "Timeout waiting for a control message to be ACKed\n"); 242 + 243 + exit: 244 + mutex_unlock(&snet->ctrl_lock); 245 + return ret; 246 + } 247 + 248 + /* Send a control message to the DPU. 249 + * A control message is a message without payload. 250 + */ 251 + static int snet_send_ctrl_msg(struct snet *snet, u16 opcode, u16 vq_idx) 252 + { 253 + struct pci_dev *pdev = snet->pdev; 254 + struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet); 255 + u32 val; 256 + int ret; 257 + 258 + /* If config version is not 2+, use the old mechanism */ 259 + if (!SNET_CFG_VER(snet, 2)) 260 + return snet_send_ctrl_msg_old(snet, opcode); 261 + 262 + mutex_lock(&snet->ctrl_lock); 263 + 264 + /* Make sure control register is empty */ 265 + ret = snet_wait_for_empty_ctrl(regs); 266 + if (ret) { 267 + SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n"); 268 + goto exit; 269 + } 270 + 271 + /* We need to clear the control register and write the opcode + vq index in the opcode 272 + * register. 273 + * We use a spinlock to serialize the writes. 274 + */ 275 + spin_lock(&snet->ctrl_spinlock); 276 + 277 + snet_write_ctrl(regs, 0); 278 + snet_write_op(regs, opcode | (vq_idx << 16)); 279 + 280 + spin_unlock(&snet->ctrl_spinlock); 281 + 282 + /* The DPU ACKs control messages by setting the chunk ready bit 283 + * without data. 284 + */ 285 + ret = snet_wait_for_data(regs); 286 + if (ret) { 287 + SNET_WARN(pdev, "Timeout waiting for control message to be ACKed\n"); 288 + goto exit; 289 + } 290 + 291 + /* Check for errors */ 292 + val = snet_read_ctrl(regs); 293 + ret = SNET_VAL_TO_ERR(val); 294 + 295 + /* Clear the chunk ready bit */ 296 + val &= ~SNET_CTRL_CHUNK_RDY_MASK; 297 + snet_write_ctrl(regs, val); 298 + 299 + ret = snet_wait_for_dpu_completion(regs); 300 + if (ret) 301 + SNET_WARN(pdev, "Timeout waiting for DPU to complete a control command, err %d\n", 302 + ret); 303 + 304 + exit: 305 + mutex_unlock(&snet->ctrl_lock); 306 + return ret; 307 + } 308 + 309 + void snet_ctrl_clear(struct snet *snet) 310 + { 311 + struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet); 312 + 313 + snet_write_op(regs, 0); 314 + } 315 + 316 + int snet_destroy_dev(struct snet *snet) 317 + { 318 + return snet_send_ctrl_msg(snet, SNET_CTRL_OP_DESTROY, 0); 319 + } 320 + 321 + int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state) 322 + { 323 + return snet_ctrl_read_from_dpu(snet, SNET_CTRL_OP_READ_VQ_STATE, idx, state, 324 + sizeof(*state)); 325 + } 326 + 327 + int snet_suspend_dev(struct snet *snet) 328 + { 329 + return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0); 330 + }

+1 -1

drivers/vdpa/solidrun/snet_hwmon.c

··· 2 2 /* 3 3 * SolidRun DPU driver for control plane 4 4 * 5 - * Copyright (C) 2022 SolidRun 5 + * Copyright (C) 2022-2023 SolidRun 6 6 * 7 7 * Author: Alvaro Karsz <alvaro.karsz@solid-run.com> 8 8 *

+76 -70

drivers/vdpa/solidrun/snet_main.c

··· 2 2 /* 3 3 * SolidRun DPU driver for control plane 4 4 * 5 - * Copyright (C) 2022 SolidRun 5 + * Copyright (C) 2022-2023 SolidRun 6 6 * 7 7 * Author: Alvaro Karsz <alvaro.karsz@solid-run.com> 8 8 * ··· 16 16 /* SNET signature */ 17 17 #define SNET_SIGNATURE 0xD0D06363 18 18 /* Max. config version that we can work with */ 19 - #define SNET_CFG_VERSION 0x1 19 + #define SNET_CFG_VERSION 0x2 20 20 /* Queue align */ 21 21 #define SNET_QUEUE_ALIGNMENT PAGE_SIZE 22 22 /* Kick value to notify that new data is available */ 23 23 #define SNET_KICK_VAL 0x1 24 24 #define SNET_CONFIG_OFF 0x0 25 - /* ACK timeout for a message */ 26 - #define SNET_ACK_TIMEOUT 2000000 27 25 /* How long we are willing to wait for a SNET device */ 28 26 #define SNET_DETECT_TIMEOUT 5000000 29 27 /* How long should we wait for the DPU to read our config */ ··· 30 32 #define SNET_GENERAL_CFG_LEN 36 31 33 #define SNET_GENERAL_CFG_VQ_LEN 40 32 34 33 - enum snet_msg { 34 - SNET_MSG_DESTROY = 1, 35 - }; 36 - 37 35 static struct snet *vdpa_to_snet(struct vdpa_device *vdpa) 38 36 { 39 37 return container_of(vdpa, struct snet, vdpa); 40 - } 41 - 42 - static int snet_wait_for_msg_ack(struct snet *snet) 43 - { 44 - struct pci_dev *pdev = snet->pdev; 45 - int ret; 46 - u32 val; 47 - 48 - /* The DPU will clear the messages offset once messages 49 - * are processed. 50 - */ 51 - ret = readx_poll_timeout(ioread32, snet->bar + snet->psnet->cfg.msg_off, 52 - val, !val, 10, SNET_ACK_TIMEOUT); 53 - if (ret) 54 - SNET_WARN(pdev, "Timeout waiting for message ACK\n"); 55 - 56 - return ret; 57 - } 58 - 59 - /* Sends a message to the DPU. 60 - * If blocking is set, the function will return once the 61 - * message was processed by the DPU (or timeout). 62 - */ 63 - static int snet_send_msg(struct snet *snet, u32 msg, bool blocking) 64 - { 65 - int ret = 0; 66 - 67 - /* Make sure the DPU acked last message before issuing a new one */ 68 - ret = snet_wait_for_msg_ack(snet); 69 - if (ret) 70 - return ret; 71 - 72 - /* Write the message */ 73 - snet_write32(snet, snet->psnet->cfg.msg_off, msg); 74 - 75 - if (blocking) 76 - ret = snet_wait_for_msg_ack(snet); 77 - else /* If non-blocking, flush the write by issuing a read */ 78 - snet_read32(snet, snet->psnet->cfg.msg_off); 79 - 80 - return ret; 81 38 } 82 39 83 40 static irqreturn_t snet_cfg_irq_hndlr(int irq, void *data) 84 41 { 85 42 struct snet *snet = data; 86 43 /* Call callback if any */ 87 - if (snet->cb.callback) 44 + if (likely(snet->cb.callback)) 88 45 return snet->cb.callback(snet->cb.private); 89 46 90 47 return IRQ_HANDLED; ··· 49 96 { 50 97 struct snet_vq *vq = data; 51 98 /* Call callback if any */ 52 - if (vq->cb.callback) 99 + if (likely(vq->cb.callback)) 53 100 return vq->cb.callback(vq->cb.private); 54 101 55 102 return IRQ_HANDLED; ··· 106 153 { 107 154 struct snet *snet = vdpa_to_snet(vdev); 108 155 /* not ready - ignore */ 109 - if (!snet->vqs[idx]->ready) 156 + if (unlikely(!snet->vqs[idx]->ready)) 110 157 return; 111 158 112 159 iowrite32(SNET_KICK_VAL, snet->vqs[idx]->kick_ptr); 160 + } 161 + 162 + static void snet_kick_vq_with_data(struct vdpa_device *vdev, u32 data) 163 + { 164 + struct snet *snet = vdpa_to_snet(vdev); 165 + u16 idx = data & 0xFFFF; 166 + 167 + /* not ready - ignore */ 168 + if (unlikely(!snet->vqs[idx]->ready)) 169 + return; 170 + 171 + iowrite32((data & 0xFFFF0000) | SNET_KICK_VAL, snet->vqs[idx]->kick_ptr); 113 172 } 114 173 115 174 static void snet_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) ··· 146 181 return snet->vqs[idx]->ready; 147 182 } 148 183 149 - static int snet_set_vq_state(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state) 184 + static bool snet_vq_state_is_initial(struct snet *snet, const struct vdpa_vq_state *state) 150 185 { 151 - struct snet *snet = vdpa_to_snet(vdev); 152 - /* Setting the VQ state is not supported. 153 - * If the asked state is the same as the initial one 154 - * we can ignore it. 155 - */ 156 186 if (SNET_HAS_FEATURE(snet, VIRTIO_F_RING_PACKED)) { 157 187 const struct vdpa_vq_state_packed *p = &state->packed; 158 188 159 189 if (p->last_avail_counter == 1 && p->last_used_counter == 1 && 160 190 p->last_avail_idx == 0 && p->last_used_idx == 0) 161 - return 0; 191 + return true; 162 192 } else { 163 193 const struct vdpa_vq_state_split *s = &state->split; 164 194 165 195 if (s->avail_index == 0) 166 - return 0; 196 + return true; 167 197 } 198 + 199 + return false; 200 + } 201 + 202 + static int snet_set_vq_state(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state) 203 + { 204 + struct snet *snet = vdpa_to_snet(vdev); 205 + 206 + /* We can set any state for config version 2+ */ 207 + if (SNET_CFG_VER(snet, 2)) { 208 + memcpy(&snet->vqs[idx]->vq_state, state, sizeof(*state)); 209 + return 0; 210 + } 211 + 212 + /* Older config - we can't set the VQ state. 213 + * Return 0 only if this is the initial state we use in the DPU. 214 + */ 215 + if (snet_vq_state_is_initial(snet, state)) 216 + return 0; 168 217 169 218 return -EOPNOTSUPP; 170 219 } 171 220 172 221 static int snet_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 173 222 { 174 - /* Not supported */ 175 - return -EOPNOTSUPP; 223 + struct snet *snet = vdpa_to_snet(vdev); 224 + 225 + return snet_read_vq_state(snet, idx, state); 176 226 } 177 227 178 228 static int snet_get_vq_irq(struct vdpa_device *vdev, u16 idx) ··· 212 232 if (!snet->status) 213 233 return 0; 214 234 215 - /* If DPU started, send a destroy message */ 235 + /* If DPU started, destroy it */ 216 236 if (snet->status & VIRTIO_CONFIG_S_DRIVER_OK) 217 - ret = snet_send_msg(snet, SNET_MSG_DESTROY, true); 237 + ret = snet_destroy_dev(snet); 218 238 219 239 /* Clear VQs */ 220 240 for (i = 0; i < snet->cfg->vq_num; i++) { ··· 238 258 snet->dpu_ready = false; 239 259 240 260 if (ret) 241 - SNET_WARN(pdev, "Incomplete reset to SNET[%u] device\n", snet->sid); 261 + SNET_WARN(pdev, "Incomplete reset to SNET[%u] device, err: %d\n", snet->sid, ret); 242 262 else 243 263 SNET_DBG(pdev, "Reset SNET[%u] device\n", snet->sid); 244 264 ··· 336 356 * | DESC AREA | 337 357 * | DEVICE AREA | 338 358 * | DRIVER AREA | 339 - * | RESERVED | 359 + * | VQ STATE (CFG 2+) | RSVD | 340 360 * 341 361 * Magic number should be written last, this is the DPU indication that the data is ready 342 362 */ ··· 371 391 off += 8; 372 392 snet_write64(snet, off, snet->vqs[i]->driver_area); 373 393 off += 8; 394 + /* Write VQ state if config version is 2+ */ 395 + if (SNET_CFG_VER(snet, 2)) 396 + snet_write32(snet, off, *(u32 *)&snet->vqs[i]->vq_state); 397 + off += 4; 398 + 374 399 /* Ignore reserved */ 375 - off += 8; 400 + off += 4; 376 401 } 377 402 378 - /* Clear snet messages address for this device */ 379 - snet_write32(snet, snet->psnet->cfg.msg_off, 0); 380 403 /* Write magic number - data is ready */ 381 404 snet_write32(snet, snet->psnet->cfg.host_cfg_off, SNET_SIGNATURE); 382 405 ··· 495 512 iowrite8(*buf_ptr++, cfg_ptr + i); 496 513 } 497 514 515 + static int snet_suspend(struct vdpa_device *vdev) 516 + { 517 + struct snet *snet = vdpa_to_snet(vdev); 518 + int ret; 519 + 520 + ret = snet_suspend_dev(snet); 521 + if (ret) 522 + SNET_ERR(snet->pdev, "SNET[%u] suspend failed, err: %d\n", snet->sid, ret); 523 + else 524 + SNET_DBG(snet->pdev, "Suspend SNET[%u] device\n", snet->sid); 525 + 526 + return ret; 527 + } 528 + 498 529 static const struct vdpa_config_ops snet_config_ops = { 499 530 .set_vq_address = snet_set_vq_address, 500 531 .set_vq_num = snet_set_vq_num, 501 532 .kick_vq = snet_kick_vq, 533 + .kick_vq_with_data = snet_kick_vq_with_data, 502 534 .set_vq_cb = snet_set_vq_cb, 503 535 .set_vq_ready = snet_set_vq_ready, 504 536 .get_vq_ready = snet_get_vq_ready, ··· 535 537 .set_status = snet_set_status, 536 538 .get_config = snet_get_config, 537 539 .set_config = snet_set_config, 540 + .suspend = snet_suspend, 538 541 }; 539 542 540 543 static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) ··· 696 697 off += 4; 697 698 cfg->hwmon_off = psnet_read32(psnet, off); 698 699 off += 4; 699 - cfg->msg_off = psnet_read32(psnet, off); 700 + cfg->ctrl_off = psnet_read32(psnet, off); 700 701 off += 4; 701 702 cfg->flags = psnet_read32(psnet, off); 702 703 off += 4; ··· 996 997 goto free_irqs; 997 998 } 998 999 1000 + /* Init control mutex and spinlock */ 1001 + mutex_init(&snet->ctrl_lock); 1002 + spin_lock_init(&snet->ctrl_spinlock); 1003 + 999 1004 /* Save pci device pointer */ 1000 1005 snet->pdev = pdev; 1001 1006 snet->psnet = psnet; ··· 1015 1012 1016 1013 /* Create a VirtIO config pointer */ 1017 1014 snet->cfg->virtio_cfg = snet->bar + snet->psnet->cfg.virtio_cfg_off; 1015 + 1016 + /* Clear control registers */ 1017 + snet_ctrl_clear(snet); 1018 1018 1019 1019 pci_set_master(pdev); 1020 1020 pci_set_drvdata(pdev, snet);

+17 -3

drivers/vdpa/solidrun/snet_vdpa.h

··· 2 2 /* 3 3 * SolidRun DPU driver for control plane 4 4 * 5 - * Copyright (C) 2022 SolidRun 5 + * Copyright (C) 2022-2023 SolidRun 6 6 * 7 7 * Author: Alvaro Karsz <alvaro.karsz@solid-run.com> 8 8 * ··· 20 20 #define SNET_INFO(pdev, fmt, ...) dev_info(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__) 21 21 #define SNET_DBG(pdev, fmt, ...) dev_dbg(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__) 22 22 #define SNET_HAS_FEATURE(s, f) ((s)->negotiated_features & BIT_ULL(f)) 23 + /* Check if negotiated config version is at least @ver */ 24 + #define SNET_CFG_VER(snet, ver) ((snet)->psnet->negotiated_cfg_ver >= (ver)) 25 + 23 26 /* VQ struct */ 24 27 struct snet_vq { 25 28 /* VQ callback */ 26 29 struct vdpa_callback cb; 30 + /* VQ state received from bus */ 31 + struct vdpa_vq_state vq_state; 27 32 /* desc base address */ 28 33 u64 desc_area; 29 34 /* device base address */ ··· 56 51 struct vdpa_device vdpa; 57 52 /* Config callback */ 58 53 struct vdpa_callback cb; 54 + /* To lock the control mechanism */ 55 + struct mutex ctrl_lock; 56 + /* Spinlock to protect critical parts in the control mechanism */ 57 + spinlock_t ctrl_spinlock; 59 58 /* array of virqueues */ 60 59 struct snet_vq **vqs; 61 60 /* Used features */ ··· 126 117 u32 kick_off; 127 118 /* Offset in PCI BAR for HW monitoring */ 128 119 u32 hwmon_off; 129 - /* Offset in PCI BAR for SNET messages */ 130 - u32 msg_off; 120 + /* Offset in PCI BAR for Control mechanism */ 121 + u32 ctrl_off; 131 122 /* Config general flags - enum snet_cfg_flags */ 132 123 u32 flags; 133 124 /* Reserved for future usage */ ··· 199 190 #if IS_ENABLED(CONFIG_HWMON) 200 191 void psnet_create_hwmon(struct pci_dev *pdev); 201 192 #endif 193 + 194 + void snet_ctrl_clear(struct snet *snet); 195 + int snet_destroy_dev(struct snet *snet); 196 + int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state); 197 + int snet_suspend_dev(struct snet *snet); 202 198 203 199 #endif //_SNET_VDPA_H_

+132 -34

drivers/vdpa/vdpa_sim/vdpa_sim.c

··· 11 11 #include <linux/module.h> 12 12 #include <linux/device.h> 13 13 #include <linux/kernel.h> 14 + #include <linux/kthread.h> 14 15 #include <linux/slab.h> 15 - #include <linux/sched.h> 16 16 #include <linux/dma-map-ops.h> 17 17 #include <linux/vringh.h> 18 18 #include <linux/vdpa.h> ··· 35 35 MODULE_PARM_DESC(max_iotlb_entries, 36 36 "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); 37 37 38 + static bool use_va = true; 39 + module_param(use_va, bool, 0444); 40 + MODULE_PARM_DESC(use_va, "Enable/disable the device's ability to use VA"); 41 + 38 42 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE 39 43 #define VDPASIM_QUEUE_MAX 256 40 44 #define VDPASIM_VENDOR_ID 0 45 + 46 + struct vdpasim_mm_work { 47 + struct kthread_work work; 48 + struct vdpasim *vdpasim; 49 + struct mm_struct *mm_to_bind; 50 + int ret; 51 + }; 52 + 53 + static void vdpasim_mm_work_fn(struct kthread_work *work) 54 + { 55 + struct vdpasim_mm_work *mm_work = 56 + container_of(work, struct vdpasim_mm_work, work); 57 + struct vdpasim *vdpasim = mm_work->vdpasim; 58 + 59 + mm_work->ret = 0; 60 + 61 + //TODO: should we attach the cgroup of the mm owner? 62 + vdpasim->mm_bound = mm_work->mm_to_bind; 63 + } 64 + 65 + static void vdpasim_worker_change_mm_sync(struct vdpasim *vdpasim, 66 + struct vdpasim_mm_work *mm_work) 67 + { 68 + struct kthread_work *work = &mm_work->work; 69 + 70 + kthread_init_work(work, vdpasim_mm_work_fn); 71 + kthread_queue_work(vdpasim->worker, work); 72 + 73 + kthread_flush_work(work); 74 + } 41 75 42 76 static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) 43 77 { ··· 93 59 { 94 60 struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 95 61 uint16_t last_avail_idx = vq->vring.last_avail_idx; 62 + struct vring_desc *desc = (struct vring_desc *) 63 + (uintptr_t)vq->desc_addr; 64 + struct vring_avail *avail = (struct vring_avail *) 65 + (uintptr_t)vq->driver_addr; 66 + struct vring_used *used = (struct vring_used *) 67 + (uintptr_t)vq->device_addr; 96 68 97 - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true, 98 - (struct vring_desc *)(uintptr_t)vq->desc_addr, 99 - (struct vring_avail *) 100 - (uintptr_t)vq->driver_addr, 101 - (struct vring_used *) 102 - (uintptr_t)vq->device_addr); 69 + if (use_va && vdpasim->mm_bound) { 70 + vringh_init_iotlb_va(&vq->vring, vdpasim->features, vq->num, 71 + true, desc, avail, used); 72 + } else { 73 + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, 74 + true, desc, avail, used); 75 + } 103 76 104 77 vq->vring.last_avail_idx = last_avail_idx; 105 78 ··· 168 127 static const struct vdpa_config_ops vdpasim_config_ops; 169 128 static const struct vdpa_config_ops vdpasim_batch_config_ops; 170 129 130 + static void vdpasim_work_fn(struct kthread_work *work) 131 + { 132 + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); 133 + struct mm_struct *mm = vdpasim->mm_bound; 134 + 135 + if (use_va && mm) { 136 + if (!mmget_not_zero(mm)) 137 + return; 138 + kthread_use_mm(mm); 139 + } 140 + 141 + vdpasim->dev_attr.work_fn(vdpasim); 142 + 143 + if (use_va && mm) { 144 + kthread_unuse_mm(mm); 145 + mmput(mm); 146 + } 147 + } 148 + 171 149 struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, 172 150 const struct vdpa_dev_set_config *config) 173 151 { ··· 215 155 vdpa = __vdpa_alloc_device(NULL, ops, 216 156 dev_attr->ngroups, dev_attr->nas, 217 157 dev_attr->alloc_size, 218 - dev_attr->name, false); 158 + dev_attr->name, use_va); 219 159 if (IS_ERR(vdpa)) { 220 160 ret = PTR_ERR(vdpa); 221 161 goto err_alloc; ··· 223 163 224 164 vdpasim = vdpa_to_sim(vdpa); 225 165 vdpasim->dev_attr = *dev_attr; 226 - INIT_WORK(&vdpasim->work, dev_attr->work_fn); 227 - spin_lock_init(&vdpasim->lock); 166 + dev = &vdpasim->vdpa.dev; 167 + 168 + kthread_init_work(&vdpasim->work, vdpasim_work_fn); 169 + vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s", 170 + dev_attr->name); 171 + if (IS_ERR(vdpasim->worker)) 172 + goto err_iommu; 173 + 174 + mutex_init(&vdpasim->mutex); 228 175 spin_lock_init(&vdpasim->iommu_lock); 229 176 230 - dev = &vdpasim->vdpa.dev; 231 177 dev->dma_mask = &dev->coherent_dma_mask; 232 178 if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) 233 179 goto err_iommu; ··· 261 195 for (i = 0; i < vdpasim->dev_attr.nas; i++) 262 196 vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0); 263 197 264 - vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); 265 - if (!vdpasim->buffer) 266 - goto err_iommu; 267 - 268 198 for (i = 0; i < dev_attr->nvqs; i++) 269 199 vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], 270 200 &vdpasim->iommu_lock); ··· 275 213 return ERR_PTR(ret); 276 214 } 277 215 EXPORT_SYMBOL_GPL(vdpasim_create); 216 + 217 + void vdpasim_schedule_work(struct vdpasim *vdpasim) 218 + { 219 + kthread_queue_work(vdpasim->worker, &vdpasim->work); 220 + } 221 + EXPORT_SYMBOL_GPL(vdpasim_schedule_work); 278 222 279 223 static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, 280 224 u64 desc_area, u64 driver_area, ··· 316 248 } 317 249 318 250 if (vq->ready) 319 - schedule_work(&vdpasim->work); 251 + vdpasim_schedule_work(vdpasim); 320 252 } 321 253 322 254 static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, ··· 335 267 struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 336 268 bool old_ready; 337 269 338 - spin_lock(&vdpasim->lock); 270 + mutex_lock(&vdpasim->mutex); 339 271 old_ready = vq->ready; 340 272 vq->ready = ready; 341 273 if (vq->ready && !old_ready) { 342 274 vdpasim_queue_ready(vdpasim, idx); 343 275 } 344 - spin_unlock(&vdpasim->lock); 276 + mutex_unlock(&vdpasim->mutex); 345 277 } 346 278 347 279 static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) ··· 359 291 struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 360 292 struct vringh *vrh = &vq->vring; 361 293 362 - spin_lock(&vdpasim->lock); 294 + mutex_lock(&vdpasim->mutex); 363 295 vrh->last_avail_idx = state->split.avail_index; 364 - spin_unlock(&vdpasim->lock); 296 + mutex_unlock(&vdpasim->mutex); 365 297 366 298 return 0; 367 299 } ··· 458 390 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 459 391 u8 status; 460 392 461 - spin_lock(&vdpasim->lock); 393 + mutex_lock(&vdpasim->mutex); 462 394 status = vdpasim->status; 463 - spin_unlock(&vdpasim->lock); 395 + mutex_unlock(&vdpasim->mutex); 464 396 465 397 return status; 466 398 } ··· 469 401 { 470 402 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 471 403 472 - spin_lock(&vdpasim->lock); 404 + mutex_lock(&vdpasim->mutex); 473 405 vdpasim->status = status; 474 - spin_unlock(&vdpasim->lock); 406 + mutex_unlock(&vdpasim->mutex); 475 407 } 476 408 477 409 static int vdpasim_reset(struct vdpa_device *vdpa) 478 410 { 479 411 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 480 412 481 - spin_lock(&vdpasim->lock); 413 + mutex_lock(&vdpasim->mutex); 482 414 vdpasim->status = 0; 483 415 vdpasim_do_reset(vdpasim); 484 - spin_unlock(&vdpasim->lock); 416 + mutex_unlock(&vdpasim->mutex); 485 417 486 418 return 0; 487 419 } ··· 490 422 { 491 423 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 492 424 493 - spin_lock(&vdpasim->lock); 425 + mutex_lock(&vdpasim->mutex); 494 426 vdpasim->running = false; 495 - spin_unlock(&vdpasim->lock); 427 + mutex_unlock(&vdpasim->mutex); 496 428 497 429 return 0; 498 430 } ··· 502 434 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 503 435 int i; 504 436 505 - spin_lock(&vdpasim->lock); 437 + mutex_lock(&vdpasim->mutex); 506 438 vdpasim->running = true; 507 439 508 440 if (vdpasim->pending_kick) { ··· 513 445 vdpasim->pending_kick = false; 514 446 } 515 447 516 - spin_unlock(&vdpasim->lock); 448 + mutex_unlock(&vdpasim->mutex); 517 449 518 450 return 0; 519 451 } ··· 585 517 586 518 iommu = &vdpasim->iommu[asid]; 587 519 588 - spin_lock(&vdpasim->lock); 520 + mutex_lock(&vdpasim->mutex); 589 521 590 522 for (i = 0; i < vdpasim->dev_attr.nvqs; i++) 591 523 if (vdpasim_get_vq_group(vdpa, i) == group) 592 524 vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu, 593 525 &vdpasim->iommu_lock); 594 526 595 - spin_unlock(&vdpasim->lock); 527 + mutex_unlock(&vdpasim->mutex); 596 528 597 529 return 0; 598 530 } ··· 629 561 vhost_iotlb_reset(iommu); 630 562 spin_unlock(&vdpasim->iommu_lock); 631 563 return ret; 564 + } 565 + 566 + static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm) 567 + { 568 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 569 + struct vdpasim_mm_work mm_work; 570 + 571 + mm_work.vdpasim = vdpasim; 572 + mm_work.mm_to_bind = mm; 573 + 574 + vdpasim_worker_change_mm_sync(vdpasim, &mm_work); 575 + 576 + return mm_work.ret; 577 + } 578 + 579 + static void vdpasim_unbind_mm(struct vdpa_device *vdpa) 580 + { 581 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 582 + struct vdpasim_mm_work mm_work; 583 + 584 + mm_work.vdpasim = vdpasim; 585 + mm_work.mm_to_bind = NULL; 586 + 587 + vdpasim_worker_change_mm_sync(vdpasim, &mm_work); 632 588 } 633 589 634 590 static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, ··· 702 610 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 703 611 int i; 704 612 705 - cancel_work_sync(&vdpasim->work); 613 + kthread_cancel_work_sync(&vdpasim->work); 614 + kthread_destroy_worker(vdpasim->worker); 706 615 707 616 for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { 708 617 vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov); 709 618 vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); 710 619 } 711 620 712 - kvfree(vdpasim->buffer); 621 + vdpasim->dev_attr.free(vdpasim); 622 + 713 623 for (i = 0; i < vdpasim->dev_attr.nas; i++) 714 624 vhost_iotlb_reset(&vdpasim->iommu[i]); 715 625 kfree(vdpasim->iommu); ··· 752 658 .set_group_asid = vdpasim_set_group_asid, 753 659 .dma_map = vdpasim_dma_map, 754 660 .dma_unmap = vdpasim_dma_unmap, 661 + .bind_mm = vdpasim_bind_mm, 662 + .unbind_mm = vdpasim_unbind_mm, 755 663 .free = vdpasim_free, 756 664 }; 757 665 ··· 788 692 .get_iova_range = vdpasim_get_iova_range, 789 693 .set_group_asid = vdpasim_set_group_asid, 790 694 .set_map = vdpasim_set_map, 695 + .bind_mm = vdpasim_bind_mm, 696 + .unbind_mm = vdpasim_unbind_mm, 791 697 .free = vdpasim_free, 792 698 }; 793 699

+8 -6

drivers/vdpa/vdpa_sim/vdpa_sim.h

··· 39 39 u64 supported_features; 40 40 size_t alloc_size; 41 41 size_t config_size; 42 - size_t buffer_size; 43 42 int nvqs; 44 43 u32 id; 45 44 u32 ngroups; 46 45 u32 nas; 47 46 48 - work_func_t work_fn; 47 + void (*work_fn)(struct vdpasim *vdpasim); 49 48 void (*get_config)(struct vdpasim *vdpasim, void *config); 50 49 void (*set_config)(struct vdpasim *vdpasim, const void *config); 51 50 int (*get_stats)(struct vdpasim *vdpasim, u16 idx, 52 51 struct sk_buff *msg, 53 52 struct netlink_ext_ack *extack); 53 + void (*free)(struct vdpasim *vdpasim); 54 54 }; 55 55 56 56 /* State of each vdpasim device */ 57 57 struct vdpasim { 58 58 struct vdpa_device vdpa; 59 59 struct vdpasim_virtqueue *vqs; 60 - struct work_struct work; 60 + struct kthread_worker *worker; 61 + struct kthread_work work; 62 + struct mm_struct *mm_bound; 61 63 struct vdpasim_dev_attr dev_attr; 62 - /* spinlock to synchronize virtqueue state */ 63 - spinlock_t lock; 64 + /* mutex to synchronize virtqueue state */ 65 + struct mutex mutex; 64 66 /* virtio config according to device type */ 65 67 void *config; 66 68 struct vhost_iotlb *iommu; 67 69 bool *iommu_pt; 68 - void *buffer; 69 70 u32 status; 70 71 u32 generation; 71 72 u64 features; ··· 79 78 80 79 struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr, 81 80 const struct vdpa_dev_set_config *config); 81 + void vdpasim_schedule_work(struct vdpasim *vdpasim); 82 82 83 83 /* TODO: cross-endian support */ 84 84 static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)

+80 -13

drivers/vdpa/vdpa_sim/vdpa_sim_blk.c

··· 11 11 #include <linux/module.h> 12 12 #include <linux/device.h> 13 13 #include <linux/kernel.h> 14 - #include <linux/sched.h> 15 14 #include <linux/blkdev.h> 16 15 #include <linux/vringh.h> 17 16 #include <linux/vdpa.h> ··· 43 44 #define VDPASIM_BLK_AS_NUM 1 44 45 #define VDPASIM_BLK_GROUP_NUM 1 45 46 47 + struct vdpasim_blk { 48 + struct vdpasim vdpasim; 49 + void *buffer; 50 + bool shared_backend; 51 + }; 52 + 53 + static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim) 54 + { 55 + return container_of(vdpasim, struct vdpasim_blk, vdpasim); 56 + } 57 + 46 58 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; 59 + 60 + static bool shared_backend; 61 + module_param(shared_backend, bool, 0444); 62 + MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices"); 63 + 64 + static void *shared_buffer; 65 + /* mutex to synchronize shared_buffer access */ 66 + static DEFINE_MUTEX(shared_buffer_mutex); 67 + 68 + static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk) 69 + { 70 + if (blk->shared_backend) 71 + mutex_lock(&shared_buffer_mutex); 72 + } 73 + 74 + static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk) 75 + { 76 + if (blk->shared_backend) 77 + mutex_unlock(&shared_buffer_mutex); 78 + } 47 79 48 80 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, 49 81 u64 num_sectors, u64 max_sectors) ··· 109 79 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, 110 80 struct vdpasim_virtqueue *vq) 111 81 { 82 + struct vdpasim_blk *blk = sim_to_blk(vdpasim); 112 83 size_t pushed = 0, to_pull, to_push; 113 84 struct virtio_blk_outhdr hdr; 114 85 bool handled = false; ··· 175 144 break; 176 145 } 177 146 147 + vdpasim_blk_buffer_lock(blk); 178 148 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 179 - vdpasim->buffer + offset, 180 - to_push); 149 + blk->buffer + offset, to_push); 150 + vdpasim_blk_buffer_unlock(blk); 181 151 if (bytes < 0) { 182 152 dev_dbg(&vdpasim->vdpa.dev, 183 153 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", ··· 198 166 break; 199 167 } 200 168 169 + vdpasim_blk_buffer_lock(blk); 201 170 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, 202 - vdpasim->buffer + offset, 203 - to_pull); 171 + blk->buffer + offset, to_pull); 172 + vdpasim_blk_buffer_unlock(blk); 204 173 if (bytes < 0) { 205 174 dev_dbg(&vdpasim->vdpa.dev, 206 175 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", ··· 281 248 } 282 249 283 250 if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 284 - memset(vdpasim->buffer + offset, 0, 251 + vdpasim_blk_buffer_lock(blk); 252 + memset(blk->buffer + offset, 0, 285 253 num_sectors << SECTOR_SHIFT); 254 + vdpasim_blk_buffer_unlock(blk); 286 255 } 287 256 288 257 break; ··· 321 286 return handled; 322 287 } 323 288 324 - static void vdpasim_blk_work(struct work_struct *work) 289 + static void vdpasim_blk_work(struct vdpasim *vdpasim) 325 290 { 326 - struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); 327 291 bool reschedule = false; 328 292 int i; 329 293 330 - spin_lock(&vdpasim->lock); 294 + mutex_lock(&vdpasim->mutex); 331 295 332 296 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 333 297 goto out; ··· 357 323 } 358 324 } 359 325 out: 360 - spin_unlock(&vdpasim->lock); 326 + mutex_unlock(&vdpasim->mutex); 361 327 362 328 if (reschedule) 363 - schedule_work(&vdpasim->work); 329 + vdpasim_schedule_work(vdpasim); 364 330 } 365 331 366 332 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) ··· 389 355 390 356 } 391 357 358 + static void vdpasim_blk_free(struct vdpasim *vdpasim) 359 + { 360 + struct vdpasim_blk *blk = sim_to_blk(vdpasim); 361 + 362 + if (!blk->shared_backend) 363 + kvfree(blk->buffer); 364 + } 365 + 392 366 static void vdpasim_blk_mgmtdev_release(struct device *dev) 393 367 { 394 368 } ··· 410 368 const struct vdpa_dev_set_config *config) 411 369 { 412 370 struct vdpasim_dev_attr dev_attr = {}; 371 + struct vdpasim_blk *blk; 413 372 struct vdpasim *simdev; 414 373 int ret; 415 374 ··· 421 378 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; 422 379 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; 423 380 dev_attr.nas = VDPASIM_BLK_AS_NUM; 424 - dev_attr.alloc_size = sizeof(struct vdpasim); 381 + dev_attr.alloc_size = sizeof(struct vdpasim_blk); 425 382 dev_attr.config_size = sizeof(struct virtio_blk_config); 426 383 dev_attr.get_config = vdpasim_blk_get_config; 427 384 dev_attr.work_fn = vdpasim_blk_work; 428 - dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT; 385 + dev_attr.free = vdpasim_blk_free; 429 386 430 387 simdev = vdpasim_create(&dev_attr, config); 431 388 if (IS_ERR(simdev)) 432 389 return PTR_ERR(simdev); 390 + 391 + blk = sim_to_blk(simdev); 392 + blk->shared_backend = shared_backend; 393 + 394 + if (blk->shared_backend) { 395 + blk->buffer = shared_buffer; 396 + } else { 397 + blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, 398 + GFP_KERNEL); 399 + if (!blk->buffer) { 400 + ret = -ENOMEM; 401 + goto put_dev; 402 + } 403 + } 433 404 434 405 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); 435 406 if (ret) ··· 494 437 if (ret) 495 438 goto parent_err; 496 439 440 + if (shared_backend) { 441 + shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, 442 + GFP_KERNEL); 443 + if (!shared_buffer) { 444 + ret = -ENOMEM; 445 + goto parent_err; 446 + } 447 + } 448 + 497 449 return 0; 498 450 499 451 parent_err: ··· 512 446 513 447 static void __exit vdpasim_blk_exit(void) 514 448 { 449 + kvfree(shared_buffer); 515 450 vdpa_mgmtdev_unregister(&mgmt_dev); 516 451 device_unregister(&vdpasim_blk_mgmtdev); 517 452 }

+25 -13

drivers/vdpa/vdpa_sim/vdpa_sim_net.c

··· 11 11 #include <linux/module.h> 12 12 #include <linux/device.h> 13 13 #include <linux/kernel.h> 14 - #include <linux/sched.h> 15 14 #include <linux/etherdevice.h> 16 15 #include <linux/vringh.h> 17 16 #include <linux/vdpa.h> ··· 58 59 struct vdpasim_dataq_stats tx_stats; 59 60 struct vdpasim_dataq_stats rx_stats; 60 61 struct vdpasim_cq_stats cq_stats; 62 + void *buffer; 61 63 }; 62 64 63 65 static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim) ··· 88 88 size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) : 89 89 sizeof(struct virtio_net_hdr); 90 90 struct virtio_net_config *vio_config = vdpasim->config; 91 + struct vdpasim_net *net = sim_to_net(vdpasim); 91 92 92 93 if (len < ETH_ALEN + hdr_len) 93 94 return false; 94 95 95 - if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) || 96 - is_multicast_ether_addr(vdpasim->buffer + hdr_len)) 96 + if (is_broadcast_ether_addr(net->buffer + hdr_len) || 97 + is_multicast_ether_addr(net->buffer + hdr_len)) 97 98 return true; 98 - if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) 99 + if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN)) 99 100 return true; 100 101 101 102 return false; ··· 193 192 u64_stats_update_end(&net->cq_stats.syncp); 194 193 } 195 194 196 - static void vdpasim_net_work(struct work_struct *work) 195 + static void vdpasim_net_work(struct vdpasim *vdpasim) 197 196 { 198 - struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); 199 197 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; 200 198 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; 201 199 struct vdpasim_net *net = sim_to_net(vdpasim); ··· 203 203 u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0; 204 204 int err; 205 205 206 - spin_lock(&vdpasim->lock); 206 + mutex_lock(&vdpasim->mutex); 207 207 208 208 if (!vdpasim->running) 209 209 goto out; ··· 227 227 228 228 ++tx_pkts; 229 229 read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, 230 - vdpasim->buffer, 231 - PAGE_SIZE); 230 + net->buffer, PAGE_SIZE); 232 231 233 232 tx_bytes += read; 234 233 ··· 246 247 } 247 248 248 249 write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, 249 - vdpasim->buffer, read); 250 + net->buffer, read); 250 251 if (write <= 0) { 251 252 ++rx_errors; 252 253 break; ··· 259 260 vdpasim_net_complete(rxq, write); 260 261 261 262 if (tx_pkts > 4) { 262 - schedule_work(&vdpasim->work); 263 + vdpasim_schedule_work(vdpasim); 263 264 goto out; 264 265 } 265 266 } 266 267 267 268 out: 268 - spin_unlock(&vdpasim->lock); 269 + mutex_unlock(&vdpasim->mutex); 269 270 270 271 u64_stats_update_begin(&net->tx_stats.syncp); 271 272 net->tx_stats.pkts += tx_pkts; ··· 428 429 vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); 429 430 } 430 431 432 + static void vdpasim_net_free(struct vdpasim *vdpasim) 433 + { 434 + struct vdpasim_net *net = sim_to_net(vdpasim); 435 + 436 + kvfree(net->buffer); 437 + } 438 + 431 439 static void vdpasim_net_mgmtdev_release(struct device *dev) 432 440 { 433 441 } ··· 464 458 dev_attr.get_config = vdpasim_net_get_config; 465 459 dev_attr.work_fn = vdpasim_net_work; 466 460 dev_attr.get_stats = vdpasim_net_get_stats; 467 - dev_attr.buffer_size = PAGE_SIZE; 461 + dev_attr.free = vdpasim_net_free; 468 462 469 463 simdev = vdpasim_create(&dev_attr, config); 470 464 if (IS_ERR(simdev)) ··· 477 471 u64_stats_init(&net->tx_stats.syncp); 478 472 u64_stats_init(&net->rx_stats.syncp); 479 473 u64_stats_init(&net->cq_stats.syncp); 474 + 475 + net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); 476 + if (!net->buffer) { 477 + ret = -ENOMEM; 478 + goto reg_err; 479 + } 480 480 481 481 /* 482 482 * Initialization must be completed before this call, since it can

+351 -63

drivers/vdpa/vdpa_user/vduse_dev.c

··· 37 37 #define DRV_LICENSE "GPL v2" 38 38 39 39 #define VDUSE_DEV_MAX (1U << MINORBITS) 40 + #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 41 + #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 40 42 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 41 - #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 43 + /* 128 MB reserved for virtqueue creation */ 44 + #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) 42 45 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 46 + 47 + #define IRQ_UNBOUND -1 43 48 44 49 struct vduse_virtqueue { 45 50 u16 index; ··· 62 57 struct vdpa_callback cb; 63 58 struct work_struct inject; 64 59 struct work_struct kick; 60 + int irq_effective_cpu; 61 + struct cpumask irq_affinity; 62 + struct kobject kobj; 65 63 }; 66 64 67 65 struct vduse_dev; ··· 84 76 struct vduse_dev { 85 77 struct vduse_vdpa *vdev; 86 78 struct device *dev; 87 - struct vduse_virtqueue *vqs; 79 + struct vduse_virtqueue **vqs; 88 80 struct vduse_iova_domain *domain; 89 81 char *name; 90 82 struct mutex lock; ··· 114 106 u32 vq_align; 115 107 struct vduse_umem *umem; 116 108 struct mutex mem_lock; 109 + unsigned int bounce_size; 110 + struct mutex domain_lock; 117 111 }; 118 112 119 113 struct vduse_dev_msg { ··· 138 128 static struct cdev vduse_ctrl_cdev; 139 129 static struct cdev vduse_cdev; 140 130 static struct workqueue_struct *vduse_irq_wq; 131 + static struct workqueue_struct *vduse_irq_bound_wq; 141 132 142 133 static u32 allowed_device_id[] = { 143 134 VIRTIO_ID_BLOCK, ··· 430 419 struct vduse_iova_domain *domain = dev->domain; 431 420 432 421 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 433 - if (domain->bounce_map) 422 + if (domain && domain->bounce_map) 434 423 vduse_domain_reset_bounce_map(domain); 435 424 436 425 down_write(&dev->rwsem); ··· 445 434 flush_work(&dev->inject); 446 435 447 436 for (i = 0; i < dev->vq_num; i++) { 448 - struct vduse_virtqueue *vq = &dev->vqs[i]; 437 + struct vduse_virtqueue *vq = dev->vqs[i]; 449 438 450 439 vq->ready = false; 451 440 vq->desc_addr = 0; ··· 464 453 spin_lock(&vq->irq_lock); 465 454 vq->cb.callback = NULL; 466 455 vq->cb.private = NULL; 456 + vq->cb.trigger = NULL; 467 457 spin_unlock(&vq->irq_lock); 468 458 flush_work(&vq->inject); 469 459 flush_work(&vq->kick); ··· 478 466 u64 device_area) 479 467 { 480 468 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 481 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 469 + struct vduse_virtqueue *vq = dev->vqs[idx]; 482 470 483 471 vq->desc_addr = desc_area; 484 472 vq->driver_addr = driver_area; ··· 512 500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 513 501 { 514 502 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 515 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 503 + struct vduse_virtqueue *vq = dev->vqs[idx]; 516 504 517 505 if (!eventfd_signal_allowed()) { 518 506 schedule_work(&vq->kick); ··· 525 513 struct vdpa_callback *cb) 526 514 { 527 515 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 528 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 516 + struct vduse_virtqueue *vq = dev->vqs[idx]; 529 517 530 518 spin_lock(&vq->irq_lock); 531 519 vq->cb.callback = cb->callback; 532 520 vq->cb.private = cb->private; 521 + vq->cb.trigger = cb->trigger; 533 522 spin_unlock(&vq->irq_lock); 534 523 } 535 524 536 525 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 537 526 { 538 527 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 539 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 528 + struct vduse_virtqueue *vq = dev->vqs[idx]; 540 529 541 530 vq->num = num; 542 531 } ··· 546 533 u16 idx, bool ready) 547 534 { 548 535 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 549 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 536 + struct vduse_virtqueue *vq = dev->vqs[idx]; 550 537 551 538 vq->ready = ready; 552 539 } ··· 554 541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 555 542 { 556 543 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 557 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 544 + struct vduse_virtqueue *vq = dev->vqs[idx]; 558 545 559 546 return vq->ready; 560 547 } ··· 563 550 const struct vdpa_vq_state *state) 564 551 { 565 552 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 566 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 553 + struct vduse_virtqueue *vq = dev->vqs[idx]; 567 554 568 555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 569 556 vq->state.packed.last_avail_counter = ··· 582 569 struct vdpa_vq_state *state) 583 570 { 584 571 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 585 - struct vduse_virtqueue *vq = &dev->vqs[idx]; 572 + struct vduse_virtqueue *vq = dev->vqs[idx]; 586 573 587 574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 588 575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); ··· 637 624 int i; 638 625 639 626 for (i = 0; i < dev->vq_num; i++) 640 - if (num_max < dev->vqs[i].num_max) 641 - num_max = dev->vqs[i].num_max; 627 + if (num_max < dev->vqs[i]->num_max) 628 + num_max = dev->vqs[i]->num_max; 642 629 643 630 return num_max; 644 631 } ··· 721 708 return dev->generation; 722 709 } 723 710 711 + static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, 712 + const struct cpumask *cpu_mask) 713 + { 714 + struct vduse_dev *dev = vdpa_to_vduse(vdpa); 715 + 716 + cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); 717 + return 0; 718 + } 719 + 720 + static const struct cpumask * 721 + vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx) 722 + { 723 + struct vduse_dev *dev = vdpa_to_vduse(vdpa); 724 + 725 + return &dev->vqs[idx]->irq_affinity; 726 + } 727 + 724 728 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 725 729 unsigned int asid, 726 730 struct vhost_iotlb *iotlb) ··· 788 758 .get_config = vduse_vdpa_get_config, 789 759 .set_config = vduse_vdpa_set_config, 790 760 .get_generation = vduse_vdpa_get_generation, 761 + .set_vq_affinity = vduse_vdpa_set_vq_affinity, 762 + .get_vq_affinity = vduse_vdpa_get_vq_affinity, 791 763 .reset = vduse_vdpa_reset, 792 764 .set_map = vduse_vdpa_set_map, 793 765 .free = vduse_vdpa_free, ··· 895 863 return -EINVAL; 896 864 897 865 index = array_index_nospec(eventfd->index, dev->vq_num); 898 - vq = &dev->vqs[index]; 866 + vq = dev->vqs[index]; 899 867 if (eventfd->fd >= 0) { 900 868 ctx = eventfd_ctx_fdget(eventfd->fd); 901 869 if (IS_ERR(ctx)) ··· 921 889 int i; 922 890 923 891 for (i = 0; i < dev->vq_num; i++) 924 - if (!dev->vqs[i].num_max) 892 + if (!dev->vqs[i]->num_max) 925 893 return false; 926 894 927 895 return true; ··· 948 916 spin_unlock_irq(&vq->irq_lock); 949 917 } 950 918 919 + static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) 920 + { 921 + bool signal = false; 922 + 923 + if (!vq->cb.trigger) 924 + return false; 925 + 926 + spin_lock_irq(&vq->irq_lock); 927 + if (vq->ready && vq->cb.trigger) { 928 + eventfd_signal(vq->cb.trigger, 1); 929 + signal = true; 930 + } 931 + spin_unlock_irq(&vq->irq_lock); 932 + 933 + return signal; 934 + } 935 + 951 936 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 952 - struct work_struct *irq_work) 937 + struct work_struct *irq_work, 938 + int irq_effective_cpu) 953 939 { 954 940 int ret = -EINVAL; 955 941 ··· 976 926 goto unlock; 977 927 978 928 ret = 0; 979 - queue_work(vduse_irq_wq, irq_work); 929 + if (irq_effective_cpu == IRQ_UNBOUND) 930 + queue_work(vduse_irq_wq, irq_work); 931 + else 932 + queue_work_on(irq_effective_cpu, 933 + vduse_irq_bound_wq, irq_work); 980 934 unlock: 981 935 up_read(&dev->rwsem); 982 936 ··· 998 944 goto unlock; 999 945 1000 946 ret = -EINVAL; 947 + if (!dev->domain) 948 + goto unlock; 949 + 1001 950 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 1002 951 goto unlock; 1003 952 ··· 1027 970 unsigned long npages, lock_limit; 1028 971 int ret; 1029 972 1030 - if (!dev->domain->bounce_map || 973 + if (!dev->domain || !dev->domain->bounce_map || 1031 974 size != dev->domain->bounce_size || 1032 975 iova != 0 || uaddr & ~PAGE_MASK) 1033 976 return -EINVAL; ··· 1086 1029 return ret; 1087 1030 } 1088 1031 1032 + static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) 1033 + { 1034 + int curr_cpu = vq->irq_effective_cpu; 1035 + 1036 + while (true) { 1037 + curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); 1038 + if (cpu_online(curr_cpu)) 1039 + break; 1040 + 1041 + if (curr_cpu >= nr_cpu_ids) 1042 + curr_cpu = IRQ_UNBOUND; 1043 + } 1044 + 1045 + vq->irq_effective_cpu = curr_cpu; 1046 + } 1047 + 1089 1048 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1090 1049 unsigned long arg) 1091 1050 { ··· 1117 1044 struct vduse_iotlb_entry entry; 1118 1045 struct vhost_iotlb_map *map; 1119 1046 struct vdpa_map_file *map_file; 1120 - struct vduse_iova_domain *domain = dev->domain; 1121 1047 struct file *f = NULL; 1122 1048 1123 1049 ret = -EFAULT; ··· 1127 1055 if (entry.start > entry.last) 1128 1056 break; 1129 1057 1130 - spin_lock(&domain->iotlb_lock); 1131 - map = vhost_iotlb_itree_first(domain->iotlb, 1058 + mutex_lock(&dev->domain_lock); 1059 + if (!dev->domain) { 1060 + mutex_unlock(&dev->domain_lock); 1061 + break; 1062 + } 1063 + spin_lock(&dev->domain->iotlb_lock); 1064 + map = vhost_iotlb_itree_first(dev->domain->iotlb, 1132 1065 entry.start, entry.last); 1133 1066 if (map) { 1134 1067 map_file = (struct vdpa_map_file *)map->opaque; ··· 1143 1066 entry.last = map->last; 1144 1067 entry.perm = map->perm; 1145 1068 } 1146 - spin_unlock(&domain->iotlb_lock); 1069 + spin_unlock(&dev->domain->iotlb_lock); 1070 + mutex_unlock(&dev->domain_lock); 1147 1071 ret = -EINVAL; 1148 1072 if (!f) 1149 1073 break; ··· 1189 1111 break; 1190 1112 } 1191 1113 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1192 - ret = vduse_dev_queue_irq_work(dev, &dev->inject); 1114 + ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); 1193 1115 break; 1194 1116 case VDUSE_VQ_SETUP: { 1195 1117 struct vduse_vq_config config; ··· 1208 1130 break; 1209 1131 1210 1132 index = array_index_nospec(config.index, dev->vq_num); 1211 - dev->vqs[index].num_max = config.max_size; 1133 + dev->vqs[index]->num_max = config.max_size; 1212 1134 ret = 0; 1213 1135 break; 1214 1136 } ··· 1226 1148 break; 1227 1149 1228 1150 index = array_index_nospec(vq_info.index, dev->vq_num); 1229 - vq = &dev->vqs[index]; 1151 + vq = dev->vqs[index]; 1230 1152 vq_info.desc_addr = vq->desc_addr; 1231 1153 vq_info.driver_addr = vq->driver_addr; 1232 1154 vq_info.device_addr = vq->device_addr; ··· 1275 1197 if (index >= dev->vq_num) 1276 1198 break; 1277 1199 1200 + ret = 0; 1278 1201 index = array_index_nospec(index, dev->vq_num); 1279 - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); 1202 + if (!vduse_vq_signal_irqfd(dev->vqs[index])) { 1203 + vduse_vq_update_effective_cpu(dev->vqs[index]); 1204 + ret = vduse_dev_queue_irq_work(dev, 1205 + &dev->vqs[index]->inject, 1206 + dev->vqs[index]->irq_effective_cpu); 1207 + } 1280 1208 break; 1281 1209 } 1282 1210 case VDUSE_IOTLB_REG_UMEM: { ··· 1297 1213 sizeof(umem.reserved))) 1298 1214 break; 1299 1215 1216 + mutex_lock(&dev->domain_lock); 1300 1217 ret = vduse_dev_reg_umem(dev, umem.iova, 1301 1218 umem.uaddr, umem.size); 1219 + mutex_unlock(&dev->domain_lock); 1302 1220 break; 1303 1221 } 1304 1222 case VDUSE_IOTLB_DEREG_UMEM: { ··· 1314 1228 if (!is_mem_zero((const char *)umem.reserved, 1315 1229 sizeof(umem.reserved))) 1316 1230 break; 1317 - 1231 + mutex_lock(&dev->domain_lock); 1318 1232 ret = vduse_dev_dereg_umem(dev, umem.iova, 1319 1233 umem.size); 1234 + mutex_unlock(&dev->domain_lock); 1320 1235 break; 1321 1236 } 1322 1237 case VDUSE_IOTLB_GET_INFO: { 1323 1238 struct vduse_iova_info info; 1324 1239 struct vhost_iotlb_map *map; 1325 - struct vduse_iova_domain *domain = dev->domain; 1326 1240 1327 1241 ret = -EFAULT; 1328 1242 if (copy_from_user(&info, argp, sizeof(info))) ··· 1336 1250 sizeof(info.reserved))) 1337 1251 break; 1338 1252 1339 - spin_lock(&domain->iotlb_lock); 1340 - map = vhost_iotlb_itree_first(domain->iotlb, 1253 + mutex_lock(&dev->domain_lock); 1254 + if (!dev->domain) { 1255 + mutex_unlock(&dev->domain_lock); 1256 + break; 1257 + } 1258 + spin_lock(&dev->domain->iotlb_lock); 1259 + map = vhost_iotlb_itree_first(dev->domain->iotlb, 1341 1260 info.start, info.last); 1342 1261 if (map) { 1343 1262 info.start = map->start; 1344 1263 info.last = map->last; 1345 1264 info.capability = 0; 1346 - if (domain->bounce_map && map->start == 0 && 1347 - map->last == domain->bounce_size - 1) 1265 + if (dev->domain->bounce_map && map->start == 0 && 1266 + map->last == dev->domain->bounce_size - 1) 1348 1267 info.capability |= VDUSE_IOVA_CAP_UMEM; 1349 1268 } 1350 - spin_unlock(&domain->iotlb_lock); 1269 + spin_unlock(&dev->domain->iotlb_lock); 1270 + mutex_unlock(&dev->domain_lock); 1351 1271 if (!map) 1352 1272 break; 1353 1273 ··· 1376 1284 { 1377 1285 struct vduse_dev *dev = file->private_data; 1378 1286 1379 - vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1287 + mutex_lock(&dev->domain_lock); 1288 + if (dev->domain) 1289 + vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1290 + mutex_unlock(&dev->domain_lock); 1380 1291 spin_lock(&dev->msg_lock); 1381 1292 /* Make sure the inflight messages can processed after reconncection */ 1382 1293 list_splice_init(&dev->recv_list, &dev->send_list); ··· 1434 1339 .llseek = noop_llseek, 1435 1340 }; 1436 1341 1342 + static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf) 1343 + { 1344 + return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity)); 1345 + } 1346 + 1347 + static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq, 1348 + const char *buf, size_t count) 1349 + { 1350 + cpumask_var_t new_value; 1351 + int ret; 1352 + 1353 + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) 1354 + return -ENOMEM; 1355 + 1356 + ret = cpumask_parse(buf, new_value); 1357 + if (ret) 1358 + goto free_mask; 1359 + 1360 + ret = -EINVAL; 1361 + if (!cpumask_intersects(new_value, cpu_online_mask)) 1362 + goto free_mask; 1363 + 1364 + cpumask_copy(&vq->irq_affinity, new_value); 1365 + ret = count; 1366 + free_mask: 1367 + free_cpumask_var(new_value); 1368 + return ret; 1369 + } 1370 + 1371 + struct vq_sysfs_entry { 1372 + struct attribute attr; 1373 + ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); 1374 + ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, 1375 + size_t count); 1376 + }; 1377 + 1378 + static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity); 1379 + 1380 + static struct attribute *vq_attrs[] = { 1381 + &irq_cb_affinity_attr.attr, 1382 + NULL, 1383 + }; 1384 + ATTRIBUTE_GROUPS(vq); 1385 + 1386 + static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, 1387 + char *buf) 1388 + { 1389 + struct vduse_virtqueue *vq = container_of(kobj, 1390 + struct vduse_virtqueue, kobj); 1391 + struct vq_sysfs_entry *entry = container_of(attr, 1392 + struct vq_sysfs_entry, attr); 1393 + 1394 + if (!entry->show) 1395 + return -EIO; 1396 + 1397 + return entry->show(vq, buf); 1398 + } 1399 + 1400 + static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, 1401 + const char *buf, size_t count) 1402 + { 1403 + struct vduse_virtqueue *vq = container_of(kobj, 1404 + struct vduse_virtqueue, kobj); 1405 + struct vq_sysfs_entry *entry = container_of(attr, 1406 + struct vq_sysfs_entry, attr); 1407 + 1408 + if (!entry->store) 1409 + return -EIO; 1410 + 1411 + return entry->store(vq, buf, count); 1412 + } 1413 + 1414 + static const struct sysfs_ops vq_sysfs_ops = { 1415 + .show = vq_attr_show, 1416 + .store = vq_attr_store, 1417 + }; 1418 + 1419 + static void vq_release(struct kobject *kobj) 1420 + { 1421 + struct vduse_virtqueue *vq = container_of(kobj, 1422 + struct vduse_virtqueue, kobj); 1423 + kfree(vq); 1424 + } 1425 + 1426 + static const struct kobj_type vq_type = { 1427 + .release = vq_release, 1428 + .sysfs_ops = &vq_sysfs_ops, 1429 + .default_groups = vq_groups, 1430 + }; 1431 + 1432 + static void vduse_dev_deinit_vqs(struct vduse_dev *dev) 1433 + { 1434 + int i; 1435 + 1436 + if (!dev->vqs) 1437 + return; 1438 + 1439 + for (i = 0; i < dev->vq_num; i++) 1440 + kobject_put(&dev->vqs[i]->kobj); 1441 + kfree(dev->vqs); 1442 + } 1443 + 1444 + static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) 1445 + { 1446 + int ret, i; 1447 + 1448 + dev->vq_align = vq_align; 1449 + dev->vq_num = vq_num; 1450 + dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1451 + if (!dev->vqs) 1452 + return -ENOMEM; 1453 + 1454 + for (i = 0; i < vq_num; i++) { 1455 + dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); 1456 + if (!dev->vqs[i]) { 1457 + ret = -ENOMEM; 1458 + goto err; 1459 + } 1460 + 1461 + dev->vqs[i]->index = i; 1462 + dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; 1463 + INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); 1464 + INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); 1465 + spin_lock_init(&dev->vqs[i]->kick_lock); 1466 + spin_lock_init(&dev->vqs[i]->irq_lock); 1467 + cpumask_setall(&dev->vqs[i]->irq_affinity); 1468 + 1469 + kobject_init(&dev->vqs[i]->kobj, &vq_type); 1470 + ret = kobject_add(&dev->vqs[i]->kobj, 1471 + &dev->dev->kobj, "vq%d", i); 1472 + if (ret) { 1473 + kfree(dev->vqs[i]); 1474 + goto err; 1475 + } 1476 + } 1477 + 1478 + return 0; 1479 + err: 1480 + while (i--) 1481 + kobject_put(&dev->vqs[i]->kobj); 1482 + kfree(dev->vqs); 1483 + dev->vqs = NULL; 1484 + return ret; 1485 + } 1486 + 1437 1487 static struct vduse_dev *vduse_dev_create(void) 1438 1488 { 1439 1489 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); ··· 1588 1348 1589 1349 mutex_init(&dev->lock); 1590 1350 mutex_init(&dev->mem_lock); 1351 + mutex_init(&dev->domain_lock); 1591 1352 spin_lock_init(&dev->msg_lock); 1592 1353 INIT_LIST_HEAD(&dev->send_list); 1593 1354 INIT_LIST_HEAD(&dev->recv_list); ··· 1637 1396 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1638 1397 idr_remove(&vduse_idr, dev->minor); 1639 1398 kvfree(dev->config); 1640 - kfree(dev->vqs); 1641 - vduse_domain_destroy(dev->domain); 1399 + vduse_dev_deinit_vqs(dev); 1400 + if (dev->domain) 1401 + vduse_domain_destroy(dev->domain); 1642 1402 kfree(dev->name); 1643 1403 vduse_dev_destroy(dev); 1644 1404 module_put(THIS_MODULE); ··· 1718 1476 1719 1477 static DEVICE_ATTR_RW(msg_timeout); 1720 1478 1479 + static ssize_t bounce_size_show(struct device *device, 1480 + struct device_attribute *attr, char *buf) 1481 + { 1482 + struct vduse_dev *dev = dev_get_drvdata(device); 1483 + 1484 + return sysfs_emit(buf, "%u\n", dev->bounce_size); 1485 + } 1486 + 1487 + static ssize_t bounce_size_store(struct device *device, 1488 + struct device_attribute *attr, 1489 + const char *buf, size_t count) 1490 + { 1491 + struct vduse_dev *dev = dev_get_drvdata(device); 1492 + unsigned int bounce_size; 1493 + int ret; 1494 + 1495 + ret = -EPERM; 1496 + mutex_lock(&dev->domain_lock); 1497 + if (dev->domain) 1498 + goto unlock; 1499 + 1500 + ret = kstrtouint(buf, 10, &bounce_size); 1501 + if (ret < 0) 1502 + goto unlock; 1503 + 1504 + ret = -EINVAL; 1505 + if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || 1506 + bounce_size < VDUSE_MIN_BOUNCE_SIZE) 1507 + goto unlock; 1508 + 1509 + dev->bounce_size = bounce_size & PAGE_MASK; 1510 + ret = count; 1511 + unlock: 1512 + mutex_unlock(&dev->domain_lock); 1513 + return ret; 1514 + } 1515 + 1516 + static DEVICE_ATTR_RW(bounce_size); 1517 + 1721 1518 static struct attribute *vduse_dev_attrs[] = { 1722 1519 &dev_attr_msg_timeout.attr, 1520 + &dev_attr_bounce_size.attr, 1723 1521 NULL 1724 1522 }; 1725 1523 ··· 1768 1486 static int vduse_create_dev(struct vduse_dev_config *config, 1769 1487 void *config_buf, u64 api_version) 1770 1488 { 1771 - int i, ret; 1489 + int ret; 1772 1490 struct vduse_dev *dev; 1773 1491 1774 1492 ret = -EEXIST; ··· 1788 1506 if (!dev->name) 1789 1507 goto err_str; 1790 1508 1791 - dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1792 - VDUSE_BOUNCE_SIZE); 1793 - if (!dev->domain) 1794 - goto err_domain; 1795 - 1509 + dev->bounce_size = VDUSE_BOUNCE_SIZE; 1796 1510 dev->config = config_buf; 1797 1511 dev->config_size = config->config_size; 1798 - dev->vq_align = config->vq_align; 1799 - dev->vq_num = config->vq_num; 1800 - dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1801 - if (!dev->vqs) 1802 - goto err_vqs; 1803 - 1804 - for (i = 0; i < dev->vq_num; i++) { 1805 - dev->vqs[i].index = i; 1806 - INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1807 - INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1808 - spin_lock_init(&dev->vqs[i].kick_lock); 1809 - spin_lock_init(&dev->vqs[i].irq_lock); 1810 - } 1811 1512 1812 1513 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1813 1514 if (ret < 0) ··· 1805 1540 ret = PTR_ERR(dev->dev); 1806 1541 goto err_dev; 1807 1542 } 1543 + 1544 + ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); 1545 + if (ret) 1546 + goto err_vqs; 1547 + 1808 1548 __module_get(THIS_MODULE); 1809 1549 1810 1550 return 0; 1551 + err_vqs: 1552 + device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1811 1553 err_dev: 1812 1554 idr_remove(&vduse_idr, dev->minor); 1813 1555 err_idr: 1814 - kfree(dev->vqs); 1815 - err_vqs: 1816 - vduse_domain_destroy(dev->domain); 1817 - err_domain: 1818 1556 kfree(dev->name); 1819 1557 err_str: 1820 1558 vduse_dev_destroy(dev); ··· 1984 1716 if (ret) 1985 1717 return ret; 1986 1718 1719 + mutex_lock(&dev->domain_lock); 1720 + if (!dev->domain) 1721 + dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1722 + dev->bounce_size); 1723 + mutex_unlock(&dev->domain_lock); 1724 + if (!dev->domain) { 1725 + put_device(&dev->vdev->vdpa.dev); 1726 + return -ENOMEM; 1727 + } 1728 + 1987 1729 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1988 1730 if (ret) { 1989 1731 put_device(&dev->vdev->vdpa.dev); 1732 + mutex_lock(&dev->domain_lock); 1733 + vduse_domain_destroy(dev->domain); 1734 + dev->domain = NULL; 1735 + mutex_unlock(&dev->domain_lock); 1990 1736 return ret; 1991 1737 } 1992 1738 ··· 2106 1824 if (ret) 2107 1825 goto err_cdev; 2108 1826 1827 + ret = -ENOMEM; 2109 1828 vduse_irq_wq = alloc_workqueue("vduse-irq", 2110 1829 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 2111 - if (!vduse_irq_wq) { 2112 - ret = -ENOMEM; 1830 + if (!vduse_irq_wq) 2113 1831 goto err_wq; 2114 - } 1832 + 1833 + vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); 1834 + if (!vduse_irq_bound_wq) 1835 + goto err_bound_wq; 2115 1836 2116 1837 ret = vduse_domain_init(); 2117 1838 if (ret) ··· 2128 1843 err_mgmtdev: 2129 1844 vduse_domain_exit(); 2130 1845 err_domain: 1846 + destroy_workqueue(vduse_irq_bound_wq); 1847 + err_bound_wq: 2131 1848 destroy_workqueue(vduse_irq_wq); 2132 1849 err_wq: 2133 1850 cdev_del(&vduse_cdev); ··· 2149 1862 { 2150 1863 vduse_mgmtdev_exit(); 2151 1864 vduse_domain_exit(); 1865 + destroy_workqueue(vduse_irq_bound_wq); 2152 1866 destroy_workqueue(vduse_irq_wq); 2153 1867 cdev_del(&vduse_cdev); 2154 1868 device_destroy(vduse_class, vduse_major);

+56 -46

drivers/vhost/scsi.c

··· 229 229 struct iov_iter out_iter; 230 230 }; 231 231 232 - /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ 232 + /* 233 + * Global mutex to protect vhost_scsi TPG list for vhost IOCTLs and LIO 234 + * configfs management operations. 235 + */ 233 236 static DEFINE_MUTEX(vhost_scsi_mutex); 234 237 static LIST_HEAD(vhost_scsi_list); 235 238 ··· 1504 1501 * vhost_scsi_tpg with an active struct vhost_scsi_nexus 1505 1502 * 1506 1503 * The lock nesting rule is: 1507 - * vhost_scsi_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex 1504 + * vs->dev.mutex -> vhost_scsi_mutex -> tpg->tv_tpg_mutex -> vq->mutex 1508 1505 */ 1509 1506 static int 1510 1507 vhost_scsi_set_endpoint(struct vhost_scsi *vs, ··· 1518 1515 int index, ret, i, len; 1519 1516 bool match = false; 1520 1517 1521 - mutex_lock(&vhost_scsi_mutex); 1522 1518 mutex_lock(&vs->dev.mutex); 1523 1519 1524 1520 /* Verify that ring has been setup correctly. */ ··· 1538 1536 if (vs->vs_tpg) 1539 1537 memcpy(vs_tpg, vs->vs_tpg, len); 1540 1538 1539 + mutex_lock(&vhost_scsi_mutex); 1541 1540 list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) { 1542 1541 mutex_lock(&tpg->tv_tpg_mutex); 1543 1542 if (!tpg->tpg_nexus) { ··· 1554 1551 if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { 1555 1552 if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { 1556 1553 mutex_unlock(&tpg->tv_tpg_mutex); 1554 + mutex_unlock(&vhost_scsi_mutex); 1557 1555 ret = -EEXIST; 1558 1556 goto undepend; 1559 1557 } ··· 1569 1565 if (ret) { 1570 1566 pr_warn("target_depend_item() failed: %d\n", ret); 1571 1567 mutex_unlock(&tpg->tv_tpg_mutex); 1568 + mutex_unlock(&vhost_scsi_mutex); 1572 1569 goto undepend; 1573 1570 } 1574 1571 tpg->tv_tpg_vhost_count++; ··· 1579 1574 } 1580 1575 mutex_unlock(&tpg->tv_tpg_mutex); 1581 1576 } 1577 + mutex_unlock(&vhost_scsi_mutex); 1582 1578 1583 1579 if (match) { 1584 1580 memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, ··· 1635 1629 kfree(vs_tpg); 1636 1630 out: 1637 1631 mutex_unlock(&vs->dev.mutex); 1638 - mutex_unlock(&vhost_scsi_mutex); 1639 1632 return ret; 1640 1633 } 1641 1634 ··· 1650 1645 int index, ret, i; 1651 1646 u8 target; 1652 1647 1653 - mutex_lock(&vhost_scsi_mutex); 1654 1648 mutex_lock(&vs->dev.mutex); 1655 1649 /* Verify that ring has been setup correctly. */ 1656 1650 for (index = 0; index < vs->dev.nvqs; ++index) { ··· 1670 1666 if (!tpg) 1671 1667 continue; 1672 1668 1673 - mutex_lock(&tpg->tv_tpg_mutex); 1674 1669 tv_tport = tpg->tport; 1675 1670 if (!tv_tport) { 1676 1671 ret = -ENODEV; 1677 - goto err_tpg; 1672 + goto err_dev; 1678 1673 } 1679 1674 1680 1675 if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { ··· 1682 1679 tv_tport->tport_name, tpg->tport_tpgt, 1683 1680 t->vhost_wwpn, t->vhost_tpgt); 1684 1681 ret = -EINVAL; 1685 - goto err_tpg; 1682 + goto err_dev; 1686 1683 } 1684 + match = true; 1685 + } 1686 + if (!match) 1687 + goto free_vs_tpg; 1688 + 1689 + /* Prevent new cmds from starting and accessing the tpgs/sessions */ 1690 + for (i = 0; i < vs->dev.nvqs; i++) { 1691 + vq = &vs->vqs[i].vq; 1692 + mutex_lock(&vq->mutex); 1693 + vhost_vq_set_backend(vq, NULL); 1694 + mutex_unlock(&vq->mutex); 1695 + } 1696 + /* Make sure cmds are not running before tearing them down. */ 1697 + vhost_scsi_flush(vs); 1698 + 1699 + for (i = 0; i < vs->dev.nvqs; i++) { 1700 + vq = &vs->vqs[i].vq; 1701 + vhost_scsi_destroy_vq_cmds(vq); 1702 + } 1703 + 1704 + /* 1705 + * We can now release our hold on the tpg and sessions and userspace 1706 + * can free them after this point. 1707 + */ 1708 + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { 1709 + target = i; 1710 + tpg = vs->vs_tpg[target]; 1711 + if (!tpg) 1712 + continue; 1713 + 1714 + mutex_lock(&tpg->tv_tpg_mutex); 1715 + 1687 1716 tpg->tv_tpg_vhost_count--; 1688 1717 tpg->vhost_scsi = NULL; 1689 1718 vs->vs_tpg[target] = NULL; 1690 - match = true; 1719 + 1691 1720 mutex_unlock(&tpg->tv_tpg_mutex); 1692 - /* 1693 - * Release se_tpg->tpg_group.cg_item configfs dependency now 1694 - * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. 1695 - */ 1721 + 1696 1722 se_tpg = &tpg->se_tpg; 1697 1723 target_undepend_item(&se_tpg->tpg_group.cg_item); 1698 1724 } 1699 - if (match) { 1700 - for (i = 0; i < vs->dev.nvqs; i++) { 1701 - vq = &vs->vqs[i].vq; 1702 - mutex_lock(&vq->mutex); 1703 - vhost_vq_set_backend(vq, NULL); 1704 - mutex_unlock(&vq->mutex); 1705 - } 1706 - /* Make sure cmds are not running before tearing them down. */ 1707 - vhost_scsi_flush(vs); 1708 1725 1709 - for (i = 0; i < vs->dev.nvqs; i++) { 1710 - vq = &vs->vqs[i].vq; 1711 - vhost_scsi_destroy_vq_cmds(vq); 1712 - } 1713 - } 1726 + free_vs_tpg: 1714 1727 /* 1715 1728 * Act as synchronize_rcu to make sure access to 1716 1729 * old vs->vs_tpg is finished. ··· 1736 1717 vs->vs_tpg = NULL; 1737 1718 WARN_ON(vs->vs_events_nr); 1738 1719 mutex_unlock(&vs->dev.mutex); 1739 - mutex_unlock(&vhost_scsi_mutex); 1740 1720 return 0; 1741 1721 1742 - err_tpg: 1743 - mutex_unlock(&tpg->tv_tpg_mutex); 1744 1722 err_dev: 1745 1723 mutex_unlock(&vs->dev.mutex); 1746 - mutex_unlock(&vhost_scsi_mutex); 1747 1724 return ret; 1748 1725 } 1749 1726 ··· 1980 1965 if (!vs) 1981 1966 return; 1982 1967 1983 - mutex_lock(&vs->dev.mutex); 1984 - 1985 1968 if (plug) 1986 1969 reason = VIRTIO_SCSI_EVT_RESET_RESCAN; 1987 1970 else ··· 1987 1974 1988 1975 vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; 1989 1976 mutex_lock(&vq->mutex); 1977 + /* 1978 + * We can't queue events if the backend has been cleared, because 1979 + * we could end up queueing an event after the flush. 1980 + */ 1981 + if (!vhost_vq_get_backend(vq)) 1982 + goto unlock; 1983 + 1990 1984 if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) 1991 1985 vhost_scsi_send_evt(vs, tpg, lun, 1992 1986 VIRTIO_SCSI_T_TRANSPORT_RESET, reason); 1987 + unlock: 1993 1988 mutex_unlock(&vq->mutex); 1994 - mutex_unlock(&vs->dev.mutex); 1995 1989 } 1996 1990 1997 1991 static void vhost_scsi_hotplug(struct vhost_scsi_tpg *tpg, struct se_lun *lun) ··· 2017 1997 struct vhost_scsi_tpg *tpg = container_of(se_tpg, 2018 1998 struct vhost_scsi_tpg, se_tpg); 2019 1999 2020 - mutex_lock(&vhost_scsi_mutex); 2021 - 2022 2000 mutex_lock(&tpg->tv_tpg_mutex); 2023 2001 tpg->tv_tpg_port_count++; 2024 - mutex_unlock(&tpg->tv_tpg_mutex); 2025 - 2026 2002 vhost_scsi_hotplug(tpg, lun); 2027 - 2028 - mutex_unlock(&vhost_scsi_mutex); 2003 + mutex_unlock(&tpg->tv_tpg_mutex); 2029 2004 2030 2005 return 0; 2031 2006 } ··· 2031 2016 struct vhost_scsi_tpg *tpg = container_of(se_tpg, 2032 2017 struct vhost_scsi_tpg, se_tpg); 2033 2018 2034 - mutex_lock(&vhost_scsi_mutex); 2035 - 2036 2019 mutex_lock(&tpg->tv_tpg_mutex); 2037 2020 tpg->tv_tpg_port_count--; 2038 - mutex_unlock(&tpg->tv_tpg_mutex); 2039 - 2040 2021 vhost_scsi_hotunplug(tpg, lun); 2041 - 2042 - mutex_unlock(&vhost_scsi_mutex); 2022 + mutex_unlock(&tpg->tv_tpg_mutex); 2043 2023 } 2044 2024 2045 2025 static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store(

+37 -7

drivers/vhost/vdpa.c

··· 219 219 return vdpa_reset(vdpa); 220 220 } 221 221 222 + static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) 223 + { 224 + struct vdpa_device *vdpa = v->vdpa; 225 + const struct vdpa_config_ops *ops = vdpa->config; 226 + 227 + if (!vdpa->use_va || !ops->bind_mm) 228 + return 0; 229 + 230 + return ops->bind_mm(vdpa, v->vdev.mm); 231 + } 232 + 233 + static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) 234 + { 235 + struct vdpa_device *vdpa = v->vdpa; 236 + const struct vdpa_config_ops *ops = vdpa->config; 237 + 238 + if (!vdpa->use_va || !ops->unbind_mm) 239 + return; 240 + 241 + ops->unbind_mm(vdpa); 242 + } 243 + 222 244 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 223 245 { 224 246 struct vdpa_device *vdpa = v->vdpa; ··· 621 599 if (vq->call_ctx.ctx) { 622 600 cb.callback = vhost_vdpa_virtqueue_cb; 623 601 cb.private = vq; 602 + cb.trigger = vq->call_ctx.ctx; 624 603 } else { 625 604 cb.callback = NULL; 626 605 cb.private = NULL; 606 + cb.trigger = NULL; 627 607 } 628 608 ops->set_vq_cb(vdpa, idx, &cb); 629 609 vhost_vdpa_setup_vq_irq(v, idx); ··· 740 716 break; 741 717 } 742 718 719 + if (r) 720 + goto out; 721 + 722 + switch (cmd) { 723 + case VHOST_SET_OWNER: 724 + r = vhost_vdpa_bind_mm(v); 725 + if (r) 726 + vhost_dev_reset_owner(d, NULL); 727 + break; 728 + } 729 + out: 743 730 mutex_unlock(&d->mutex); 744 731 return r; 745 732 } ··· 886 851 if (!v->in_batch) 887 852 ops->set_map(vdpa, asid, iotlb); 888 853 } 889 - /* If we are in the middle of batch processing, delay the free 890 - * of AS until BATCH_END. 891 - */ 892 - if (!v->in_batch && !iotlb->nmaps) 893 - vhost_vdpa_remove_as(v, asid); 854 + 894 855 } 895 856 896 857 static int vhost_vdpa_va_map(struct vhost_vdpa *v, ··· 1143 1112 if (v->in_batch && ops->set_map) 1144 1113 ops->set_map(vdpa, asid, iotlb); 1145 1114 v->in_batch = false; 1146 - if (!iotlb->nmaps) 1147 - vhost_vdpa_remove_as(v, asid); 1148 1115 break; 1149 1116 default: 1150 1117 r = -EINVAL; ··· 1316 1287 vhost_vdpa_clean_irq(v); 1317 1288 vhost_vdpa_reset(v); 1318 1289 vhost_dev_stop(&v->vdev); 1290 + vhost_vdpa_unbind_mm(v); 1319 1291 vhost_vdpa_config_put(v); 1320 1292 vhost_vdpa_cleanup(v); 1321 1293 mutex_unlock(&d->mutex);

+2 -4

drivers/vhost/vhost.c

··· 434 434 size_t event __maybe_unused = 435 435 vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 436 436 437 - return sizeof(*vq->avail) + 438 - sizeof(*vq->avail->ring) * num + event; 437 + return size_add(struct_size(vq->avail, ring, num), event); 439 438 } 440 439 441 440 static size_t vhost_get_used_size(struct vhost_virtqueue *vq, ··· 443 444 size_t event __maybe_unused = 444 445 vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 445 446 446 - return sizeof(*vq->used) + 447 - sizeof(*vq->used->ring) * num + event; 447 + return size_add(struct_size(vq->used, ring, num), event); 448 448 } 449 449 450 450 static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,

+150 -41

drivers/vhost/vringh.c

··· 636 636 * @features: the feature bits for this ring. 637 637 * @num: the number of elements. 638 638 * @weak_barriers: true if we only need memory barriers, not I/O. 639 - * @desc: the userpace descriptor pointer. 640 - * @avail: the userpace avail pointer. 641 - * @used: the userpace used pointer. 639 + * @desc: the userspace descriptor pointer. 640 + * @avail: the userspace avail pointer. 641 + * @used: the userspace used pointer. 642 642 * 643 643 * Returns an error if num is invalid: you should check pointers 644 644 * yourself! ··· 911 911 * @features: the feature bits for this ring. 912 912 * @num: the number of elements. 913 913 * @weak_barriers: true if we only need memory barriers, not I/O. 914 - * @desc: the userpace descriptor pointer. 915 - * @avail: the userpace avail pointer. 916 - * @used: the userpace used pointer. 914 + * @desc: the userspace descriptor pointer. 915 + * @avail: the userspace avail pointer. 916 + * @used: the userspace used pointer. 917 917 * 918 918 * Returns an error if num is invalid. 919 919 */ ··· 1094 1094 1095 1095 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 1096 1096 1097 + struct iotlb_vec { 1098 + union { 1099 + struct iovec *iovec; 1100 + struct bio_vec *bvec; 1101 + } iov; 1102 + size_t count; 1103 + }; 1104 + 1097 1105 static int iotlb_translate(const struct vringh *vrh, 1098 1106 u64 addr, u64 len, u64 *translated, 1099 - struct bio_vec iov[], 1100 - int iov_size, u32 perm) 1107 + struct iotlb_vec *ivec, u32 perm) 1101 1108 { 1102 1109 struct vhost_iotlb_map *map; 1103 1110 struct vhost_iotlb *iotlb = vrh->iotlb; ··· 1114 1107 spin_lock(vrh->iotlb_lock); 1115 1108 1116 1109 while (len > s) { 1117 - u64 size, pa, pfn; 1110 + uintptr_t io_addr; 1111 + size_t io_len; 1112 + u64 size; 1118 1113 1119 - if (unlikely(ret >= iov_size)) { 1114 + if (unlikely(ret >= ivec->count)) { 1120 1115 ret = -ENOBUFS; 1121 1116 break; 1122 1117 } ··· 1133 1124 } 1134 1125 1135 1126 size = map->size - addr + map->start; 1136 - pa = map->addr + addr - map->start; 1137 - pfn = pa >> PAGE_SHIFT; 1138 - bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size), 1139 - pa & (PAGE_SIZE - 1)); 1127 + io_len = min(len - s, size); 1128 + io_addr = map->addr - map->start + addr; 1129 + 1130 + if (vrh->use_va) { 1131 + struct iovec *iovec = ivec->iov.iovec; 1132 + 1133 + iovec[ret].iov_len = io_len; 1134 + iovec[ret].iov_base = (void __user *)io_addr; 1135 + } else { 1136 + u64 pfn = io_addr >> PAGE_SHIFT; 1137 + struct bio_vec *bvec = ivec->iov.bvec; 1138 + 1139 + bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len, 1140 + io_addr & (PAGE_SIZE - 1)); 1141 + } 1142 + 1140 1143 s += size; 1141 1144 addr += size; 1142 1145 ++ret; ··· 1162 1141 return ret; 1163 1142 } 1164 1143 1144 + #define IOTLB_IOV_STRIDE 16 1145 + 1165 1146 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 1166 1147 void *src, size_t len) 1167 1148 { 1149 + struct iotlb_vec ivec; 1150 + union { 1151 + struct iovec iovec[IOTLB_IOV_STRIDE]; 1152 + struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1153 + } iov; 1168 1154 u64 total_translated = 0; 1169 1155 1156 + ivec.iov.iovec = iov.iovec; 1157 + ivec.count = IOTLB_IOV_STRIDE; 1158 + 1170 1159 while (total_translated < len) { 1171 - struct bio_vec iov[16]; 1172 1160 struct iov_iter iter; 1173 1161 u64 translated; 1174 1162 int ret; 1175 1163 1176 1164 ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 1177 1165 len - total_translated, &translated, 1178 - iov, ARRAY_SIZE(iov), VHOST_MAP_RO); 1166 + &ivec, VHOST_MAP_RO); 1179 1167 if (ret == -ENOBUFS) 1180 - ret = ARRAY_SIZE(iov); 1168 + ret = IOTLB_IOV_STRIDE; 1181 1169 else if (ret < 0) 1182 1170 return ret; 1183 1171 1184 - iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated); 1172 + if (vrh->use_va) { 1173 + iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret, 1174 + translated); 1175 + } else { 1176 + iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret, 1177 + translated); 1178 + } 1185 1179 1186 1180 ret = copy_from_iter(dst, translated, &iter); 1187 1181 if (ret < 0) ··· 1213 1177 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 1214 1178 void *src, size_t len) 1215 1179 { 1180 + struct iotlb_vec ivec; 1181 + union { 1182 + struct iovec iovec[IOTLB_IOV_STRIDE]; 1183 + struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1184 + } iov; 1216 1185 u64 total_translated = 0; 1217 1186 1187 + ivec.iov.iovec = iov.iovec; 1188 + ivec.count = IOTLB_IOV_STRIDE; 1189 + 1218 1190 while (total_translated < len) { 1219 - struct bio_vec iov[16]; 1220 1191 struct iov_iter iter; 1221 1192 u64 translated; 1222 1193 int ret; 1223 1194 1224 1195 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 1225 1196 len - total_translated, &translated, 1226 - iov, ARRAY_SIZE(iov), VHOST_MAP_WO); 1197 + &ivec, VHOST_MAP_WO); 1227 1198 if (ret == -ENOBUFS) 1228 - ret = ARRAY_SIZE(iov); 1199 + ret = IOTLB_IOV_STRIDE; 1229 1200 else if (ret < 0) 1230 1201 return ret; 1231 1202 1232 - iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated); 1203 + if (vrh->use_va) { 1204 + iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret, 1205 + translated); 1206 + } else { 1207 + iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret, 1208 + translated); 1209 + } 1233 1210 1234 1211 ret = copy_to_iter(src, translated, &iter); 1235 1212 if (ret < 0) ··· 1259 1210 static inline int getu16_iotlb(const struct vringh *vrh, 1260 1211 u16 *val, const __virtio16 *p) 1261 1212 { 1262 - struct bio_vec iov; 1263 - void *kaddr, *from; 1213 + struct iotlb_vec ivec; 1214 + union { 1215 + struct iovec iovec[1]; 1216 + struct bio_vec bvec[1]; 1217 + } iov; 1218 + __virtio16 tmp; 1264 1219 int ret; 1265 1220 1221 + ivec.iov.iovec = iov.iovec; 1222 + ivec.count = 1; 1223 + 1266 1224 /* Atomic read is needed for getu16 */ 1267 - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, 1268 - &iov, 1, VHOST_MAP_RO); 1225 + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1226 + NULL, &ivec, VHOST_MAP_RO); 1269 1227 if (ret < 0) 1270 1228 return ret; 1271 1229 1272 - kaddr = kmap_atomic(iov.bv_page); 1273 - from = kaddr + iov.bv_offset; 1274 - *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); 1275 - kunmap_atomic(kaddr); 1230 + if (vrh->use_va) { 1231 + ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1232 + if (ret) 1233 + return ret; 1234 + } else { 1235 + void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1236 + void *from = kaddr + ivec.iov.bvec[0].bv_offset; 1237 + 1238 + tmp = READ_ONCE(*(__virtio16 *)from); 1239 + kunmap_local(kaddr); 1240 + } 1241 + 1242 + *val = vringh16_to_cpu(vrh, tmp); 1276 1243 1277 1244 return 0; 1278 1245 } ··· 1296 1231 static inline int putu16_iotlb(const struct vringh *vrh, 1297 1232 __virtio16 *p, u16 val) 1298 1233 { 1299 - struct bio_vec iov; 1300 - void *kaddr, *to; 1234 + struct iotlb_vec ivec; 1235 + union { 1236 + struct iovec iovec; 1237 + struct bio_vec bvec; 1238 + } iov; 1239 + __virtio16 tmp; 1301 1240 int ret; 1302 1241 1242 + ivec.iov.iovec = &iov.iovec; 1243 + ivec.count = 1; 1244 + 1303 1245 /* Atomic write is needed for putu16 */ 1304 - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, 1305 - &iov, 1, VHOST_MAP_WO); 1246 + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1247 + NULL, &ivec, VHOST_MAP_RO); 1306 1248 if (ret < 0) 1307 1249 return ret; 1308 1250 1309 - kaddr = kmap_atomic(iov.bv_page); 1310 - to = kaddr + iov.bv_offset; 1311 - WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); 1312 - kunmap_atomic(kaddr); 1251 + tmp = cpu_to_vringh16(vrh, val); 1252 + 1253 + if (vrh->use_va) { 1254 + ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1255 + if (ret) 1256 + return ret; 1257 + } else { 1258 + void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1259 + void *to = kaddr + ivec.iov.bvec[0].bv_offset; 1260 + 1261 + WRITE_ONCE(*(__virtio16 *)to, tmp); 1262 + kunmap_local(kaddr); 1263 + } 1313 1264 1314 1265 return 0; 1315 1266 } ··· 1387 1306 * @features: the feature bits for this ring. 1388 1307 * @num: the number of elements. 1389 1308 * @weak_barriers: true if we only need memory barriers, not I/O. 1390 - * @desc: the userpace descriptor pointer. 1391 - * @avail: the userpace avail pointer. 1392 - * @used: the userpace used pointer. 1309 + * @desc: the userspace descriptor pointer. 1310 + * @avail: the userspace avail pointer. 1311 + * @used: the userspace used pointer. 1393 1312 * 1394 1313 * Returns an error if num is invalid. 1395 1314 */ ··· 1399 1318 struct vring_avail *avail, 1400 1319 struct vring_used *used) 1401 1320 { 1321 + vrh->use_va = false; 1322 + 1402 1323 return vringh_init_kern(vrh, features, num, weak_barriers, 1403 1324 desc, avail, used); 1404 1325 } 1405 1326 EXPORT_SYMBOL(vringh_init_iotlb); 1327 + 1328 + /** 1329 + * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing 1330 + * user VA. 1331 + * @vrh: the vringh to initialize. 1332 + * @features: the feature bits for this ring. 1333 + * @num: the number of elements. 1334 + * @weak_barriers: true if we only need memory barriers, not I/O. 1335 + * @desc: the userspace descriptor pointer. 1336 + * @avail: the userspace avail pointer. 1337 + * @used: the userspace used pointer. 1338 + * 1339 + * Returns an error if num is invalid. 1340 + */ 1341 + int vringh_init_iotlb_va(struct vringh *vrh, u64 features, 1342 + unsigned int num, bool weak_barriers, 1343 + struct vring_desc *desc, 1344 + struct vring_avail *avail, 1345 + struct vring_used *used) 1346 + { 1347 + vrh->use_va = true; 1348 + 1349 + return vringh_init_kern(vrh, features, num, weak_barriers, 1350 + desc, avail, used); 1351 + } 1352 + EXPORT_SYMBOL(vringh_init_iotlb_va); 1406 1353 1407 1354 /** 1408 1355 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.

+17 -1

drivers/virtio/virtio_mmio.c

··· 286 286 return true; 287 287 } 288 288 289 + static bool vm_notify_with_data(struct virtqueue *vq) 290 + { 291 + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); 292 + u32 data = vring_notification_data(vq); 293 + 294 + writel(data, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); 295 + 296 + return true; 297 + } 298 + 289 299 /* Notify all virtqueues on an interrupt. */ 290 300 static irqreturn_t vm_interrupt(int irq, void *opaque) 291 301 { ··· 374 364 const char *name, bool ctx) 375 365 { 376 366 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 367 + bool (*notify)(struct virtqueue *vq); 377 368 struct virtio_mmio_vq_info *info; 378 369 struct virtqueue *vq; 379 370 unsigned long flags; 380 371 unsigned int num; 381 372 int err; 373 + 374 + if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA)) 375 + notify = vm_notify_with_data; 376 + else 377 + notify = vm_notify; 382 378 383 379 if (!name) 384 380 return NULL; ··· 414 398 415 399 /* Create the vring */ 416 400 vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, 417 - true, true, ctx, vm_notify, callback, name); 401 + true, true, ctx, notify, callback, name); 418 402 if (!vq) { 419 403 err = -ENOMEM; 420 404 goto error_new_virtqueue;

+16 -6

drivers/virtio/virtio_pci_modern.c

··· 288 288 return vp_modern_config_vector(&vp_dev->mdev, vector); 289 289 } 290 290 291 + static bool vp_notify_with_data(struct virtqueue *vq) 292 + { 293 + u32 data = vring_notification_data(vq); 294 + 295 + iowrite32(data, (void __iomem *)vq->priv); 296 + 297 + return true; 298 + } 299 + 291 300 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 292 301 struct virtio_pci_vq_info *info, 293 302 unsigned int index, ··· 307 298 { 308 299 309 300 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 301 + bool (*notify)(struct virtqueue *vq); 310 302 struct virtqueue *vq; 311 303 u16 num; 312 304 int err; 305 + 306 + if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA)) 307 + notify = vp_notify_with_data; 308 + else 309 + notify = vp_notify; 313 310 314 311 if (index >= vp_modern_get_num_queues(mdev)) 315 312 return ERR_PTR(-EINVAL); ··· 325 310 if (!num || vp_modern_get_queue_enable(mdev, index)) 326 311 return ERR_PTR(-ENOENT); 327 312 328 - if (!is_power_of_2(num)) { 329 - dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); 330 - return ERR_PTR(-EINVAL); 331 - } 332 - 333 313 info->msix_vector = msix_vec; 334 314 335 315 /* create the vring */ 336 316 vq = vring_create_virtqueue(index, num, 337 317 SMP_CACHE_BYTES, &vp_dev->vdev, 338 318 true, true, ctx, 339 - vp_notify, callback, name); 319 + notify, callback, name); 340 320 if (!vq) 341 321 return ERR_PTR(-ENOMEM); 342 322

+59 -30

drivers/virtio/virtio_ring.c

··· 231 231 * Helpers. 232 232 */ 233 233 234 - #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 234 + #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 235 235 236 - static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 237 - unsigned int total_sg) 236 + static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 237 + unsigned int total_sg) 238 238 { 239 239 /* 240 240 * If the host supports indirect descriptor tables, and we have multiple ··· 269 269 * unconditionally on data path. 270 270 */ 271 271 272 - static bool vring_use_dma_api(struct virtio_device *vdev) 272 + static bool vring_use_dma_api(const struct virtio_device *vdev) 273 273 { 274 274 if (!virtio_has_dma_quirk(vdev)) 275 275 return true; ··· 289 289 return false; 290 290 } 291 291 292 - size_t virtio_max_dma_size(struct virtio_device *vdev) 292 + size_t virtio_max_dma_size(const struct virtio_device *vdev) 293 293 { 294 294 size_t max_segment_size = SIZE_MAX; 295 295 ··· 349 349 * making all of the arch DMA ops work on the vring device itself 350 350 * is a mess. 351 351 */ 352 - static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 352 + static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 353 353 { 354 354 return vq->dma_dev; 355 355 } ··· 423 423 */ 424 424 425 425 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 426 - struct vring_desc *desc) 426 + const struct vring_desc *desc) 427 427 { 428 428 u16 flags; 429 429 ··· 784 784 } 785 785 } 786 786 787 - static inline bool more_used_split(const struct vring_virtqueue *vq) 787 + static bool more_used_split(const struct vring_virtqueue *vq) 788 788 { 789 789 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 790 790 vq->split.vring.used->idx); ··· 854 854 855 855 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 856 856 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 857 + 858 + /* 859 + * If device triggered an event already it won't trigger one again: 860 + * no need to disable. 861 + */ 862 + if (vq->event_triggered) 863 + return; 864 + 857 865 if (vq->event) 858 866 /* TODO: this is a hack. Figure out a cleaner value to write. */ 859 867 vring_used_event(&vq->split.vring) = 0x0; ··· 1180 1172 /* 1181 1173 * Packed ring specific functions - *_packed(). 1182 1174 */ 1183 - static inline bool packed_used_wrap_counter(u16 last_used_idx) 1175 + static bool packed_used_wrap_counter(u16 last_used_idx) 1184 1176 { 1185 1177 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1186 1178 } 1187 1179 1188 - static inline u16 packed_last_used(u16 last_used_idx) 1180 + static u16 packed_last_used(u16 last_used_idx) 1189 1181 { 1190 1182 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1191 1183 } 1192 1184 1193 1185 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1194 - struct vring_desc_extra *extra) 1186 + const struct vring_desc_extra *extra) 1195 1187 { 1196 1188 u16 flags; 1197 1189 ··· 1214 1206 } 1215 1207 1216 1208 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1217 - struct vring_packed_desc *desc) 1209 + const struct vring_packed_desc *desc) 1218 1210 { 1219 1211 u16 flags; 1220 1212 ··· 1620 1612 return avail == used && used == used_wrap_counter; 1621 1613 } 1622 1614 1623 - static inline bool more_used_packed(const struct vring_virtqueue *vq) 1615 + static bool more_used_packed(const struct vring_virtqueue *vq) 1624 1616 { 1625 1617 u16 last_used; 1626 1618 u16 last_used_idx; ··· 1707 1699 1708 1700 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1709 1701 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1702 + 1703 + /* 1704 + * If device triggered an event already it won't trigger one again: 1705 + * no need to disable. 1706 + */ 1707 + if (vq->event_triggered) 1708 + return; 1709 + 1710 1710 vq->packed.vring.driver->flags = 1711 1711 cpu_to_le16(vq->packed.event_flags_shadow); 1712 1712 } ··· 2346 2330 { 2347 2331 struct vring_virtqueue *vq = to_vvq(_vq); 2348 2332 2349 - /* If device triggered an event already it won't trigger one again: 2350 - * no need to disable. 2351 - */ 2352 - if (vq->event_triggered) 2353 - return; 2354 - 2355 2333 if (vq->packed_ring) 2356 2334 virtqueue_disable_cb_packed(_vq); 2357 2335 else ··· 2762 2752 } 2763 2753 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2764 2754 2755 + u32 vring_notification_data(struct virtqueue *_vq) 2756 + { 2757 + struct vring_virtqueue *vq = to_vvq(_vq); 2758 + u16 next; 2759 + 2760 + if (vq->packed_ring) 2761 + next = (vq->packed.next_avail_idx & 2762 + ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 2763 + vq->packed.avail_wrap_counter << 2764 + VRING_PACKED_EVENT_F_WRAP_CTR; 2765 + else 2766 + next = vq->split.avail_idx_shadow; 2767 + 2768 + return next << 16 | _vq->index; 2769 + } 2770 + EXPORT_SYMBOL_GPL(vring_notification_data); 2771 + 2765 2772 /* Manipulates transport-specific feature bits. */ 2766 2773 void vring_transport_features(struct virtio_device *vdev) 2767 2774 { ··· 2798 2771 break; 2799 2772 case VIRTIO_F_ORDER_PLATFORM: 2800 2773 break; 2774 + case VIRTIO_F_NOTIFICATION_DATA: 2775 + break; 2801 2776 default: 2802 2777 /* We don't understand this bit. */ 2803 2778 __virtio_clear_bit(vdev, i); ··· 2815 2786 * Returns the size of the vring. This is mainly used for boasting to 2816 2787 * userspace. Unlike other operations, this need not be serialized. 2817 2788 */ 2818 - unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2789 + unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 2819 2790 { 2820 2791 2821 - struct vring_virtqueue *vq = to_vvq(_vq); 2792 + const struct vring_virtqueue *vq = to_vvq(_vq); 2822 2793 2823 2794 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2824 2795 } ··· 2848 2819 } 2849 2820 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 2850 2821 2851 - bool virtqueue_is_broken(struct virtqueue *_vq) 2822 + bool virtqueue_is_broken(const struct virtqueue *_vq) 2852 2823 { 2853 - struct vring_virtqueue *vq = to_vvq(_vq); 2824 + const struct vring_virtqueue *vq = to_vvq(_vq); 2854 2825 2855 2826 return READ_ONCE(vq->broken); 2856 2827 } ··· 2897 2868 } 2898 2869 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2899 2870 2900 - dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2871 + dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 2901 2872 { 2902 - struct vring_virtqueue *vq = to_vvq(_vq); 2873 + const struct vring_virtqueue *vq = to_vvq(_vq); 2903 2874 2904 2875 BUG_ON(!vq->we_own_ring); 2905 2876 ··· 2910 2881 } 2911 2882 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2912 2883 2913 - dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2884 + dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 2914 2885 { 2915 - struct vring_virtqueue *vq = to_vvq(_vq); 2886 + const struct vring_virtqueue *vq = to_vvq(_vq); 2916 2887 2917 2888 BUG_ON(!vq->we_own_ring); 2918 2889 ··· 2924 2895 } 2925 2896 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2926 2897 2927 - dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2898 + dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 2928 2899 { 2929 - struct vring_virtqueue *vq = to_vvq(_vq); 2900 + const struct vring_virtqueue *vq = to_vvq(_vq); 2930 2901 2931 2902 BUG_ON(!vq->we_own_ring); 2932 2903 ··· 2939 2910 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2940 2911 2941 2912 /* Only available for split ring */ 2942 - const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2913 + const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 2943 2914 { 2944 2915 return &to_vvq(vq)->split.vring; 2945 2916 }

+118 -2

drivers/virtio/virtio_vdpa.c

··· 13 13 #include <linux/kernel.h> 14 14 #include <linux/slab.h> 15 15 #include <linux/uuid.h> 16 + #include <linux/group_cpus.h> 16 17 #include <linux/virtio.h> 17 18 #include <linux/vdpa.h> 18 19 #include <linux/virtio_config.h> ··· 113 112 return true; 114 113 } 115 114 115 + static bool virtio_vdpa_notify_with_data(struct virtqueue *vq) 116 + { 117 + struct vdpa_device *vdpa = vd_get_vdpa(vq->vdev); 118 + const struct vdpa_config_ops *ops = vdpa->config; 119 + u32 data = vring_notification_data(vq); 120 + 121 + ops->kick_vq_with_data(vdpa, data); 122 + 123 + return true; 124 + } 125 + 116 126 static irqreturn_t virtio_vdpa_config_cb(void *private) 117 127 { 118 128 struct virtio_vdpa_device *vd_dev = private; ··· 150 138 struct device *dma_dev; 151 139 const struct vdpa_config_ops *ops = vdpa->config; 152 140 struct virtio_vdpa_vq_info *info; 141 + bool (*notify)(struct virtqueue *vq) = virtio_vdpa_notify; 153 142 struct vdpa_callback cb; 154 143 struct virtqueue *vq; 155 144 u64 desc_addr, driver_addr, device_addr; ··· 166 153 167 154 if (index >= vdpa->nvqs) 168 155 return ERR_PTR(-ENOENT); 156 + 157 + /* We cannot accept VIRTIO_F_NOTIFICATION_DATA without kick_vq_with_data */ 158 + if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA)) { 159 + if (ops->kick_vq_with_data) 160 + notify = virtio_vdpa_notify_with_data; 161 + else 162 + __virtio_clear_bit(vdev, VIRTIO_F_NOTIFICATION_DATA); 163 + } 169 164 170 165 /* Queue shouldn't already be set up. */ 171 166 if (ops->get_vq_ready(vdpa, index)) ··· 204 183 dma_dev = vdpa_get_dma_dev(vdpa); 205 184 vq = vring_create_virtqueue_dma(index, max_num, align, vdev, 206 185 true, may_reduce_num, ctx, 207 - virtio_vdpa_notify, callback, 208 - name, dma_dev); 186 + notify, callback, name, dma_dev); 209 187 if (!vq) { 210 188 err = -ENOMEM; 211 189 goto error_new_virtqueue; ··· 215 195 /* Setup virtqueue callback */ 216 196 cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; 217 197 cb.private = info; 198 + cb.trigger = NULL; 218 199 ops->set_vq_cb(vdpa, index, &cb); 219 200 ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); 220 201 ··· 293 272 virtio_vdpa_del_vq(vq); 294 273 } 295 274 275 + static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) 276 + { 277 + affd->nr_sets = 1; 278 + affd->set_size[0] = affvecs; 279 + } 280 + 281 + static struct cpumask * 282 + create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) 283 + { 284 + unsigned int affvecs = 0, curvec, usedvecs, i; 285 + struct cpumask *masks = NULL; 286 + 287 + if (nvecs > affd->pre_vectors + affd->post_vectors) 288 + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; 289 + 290 + if (!affd->calc_sets) 291 + affd->calc_sets = default_calc_sets; 292 + 293 + affd->calc_sets(affd, affvecs); 294 + 295 + if (!affvecs) 296 + return NULL; 297 + 298 + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); 299 + if (!masks) 300 + return NULL; 301 + 302 + /* Fill out vectors at the beginning that don't need affinity */ 303 + for (curvec = 0; curvec < affd->pre_vectors; curvec++) 304 + cpumask_setall(&masks[curvec]); 305 + 306 + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { 307 + unsigned int this_vecs = affd->set_size[i]; 308 + int j; 309 + struct cpumask *result = group_cpus_evenly(this_vecs); 310 + 311 + if (!result) { 312 + kfree(masks); 313 + return NULL; 314 + } 315 + 316 + for (j = 0; j < this_vecs; j++) 317 + cpumask_copy(&masks[curvec + j], &result[j]); 318 + kfree(result); 319 + 320 + curvec += this_vecs; 321 + usedvecs += this_vecs; 322 + } 323 + 324 + /* Fill out vectors at the end that don't need affinity */ 325 + if (usedvecs >= affvecs) 326 + curvec = affd->pre_vectors + affvecs; 327 + else 328 + curvec = affd->pre_vectors + usedvecs; 329 + for (; curvec < nvecs; curvec++) 330 + cpumask_setall(&masks[curvec]); 331 + 332 + return masks; 333 + } 334 + 296 335 static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 297 336 struct virtqueue *vqs[], 298 337 vq_callback_t *callbacks[], ··· 363 282 struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); 364 283 struct vdpa_device *vdpa = vd_get_vdpa(vdev); 365 284 const struct vdpa_config_ops *ops = vdpa->config; 285 + struct irq_affinity default_affd = { 0 }; 286 + struct cpumask *masks; 366 287 struct vdpa_callback cb; 367 288 int i, err, queue_idx = 0; 289 + 290 + masks = create_affinity_masks(nvqs, desc ? desc : &default_affd); 291 + if (!masks) 292 + return -ENOMEM; 368 293 369 294 for (i = 0; i < nvqs; ++i) { 370 295 if (!names[i]) { ··· 385 298 err = PTR_ERR(vqs[i]); 386 299 goto err_setup_vq; 387 300 } 301 + ops->set_vq_affinity(vdpa, i, &masks[i]); 388 302 } 389 303 390 304 cb.callback = virtio_vdpa_config_cb; ··· 425 337 return dev_name(&vdpa->dev); 426 338 } 427 339 340 + static int virtio_vdpa_set_vq_affinity(struct virtqueue *vq, 341 + const struct cpumask *cpu_mask) 342 + { 343 + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev); 344 + struct vdpa_device *vdpa = vd_dev->vdpa; 345 + const struct vdpa_config_ops *ops = vdpa->config; 346 + unsigned int index = vq->index; 347 + 348 + if (ops->set_vq_affinity) 349 + return ops->set_vq_affinity(vdpa, index, cpu_mask); 350 + 351 + return 0; 352 + } 353 + 354 + static const struct cpumask * 355 + virtio_vdpa_get_vq_affinity(struct virtio_device *vdev, int index) 356 + { 357 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 358 + const struct vdpa_config_ops *ops = vdpa->config; 359 + 360 + if (ops->get_vq_affinity) 361 + return ops->get_vq_affinity(vdpa, index); 362 + 363 + return NULL; 364 + } 365 + 428 366 static const struct virtio_config_ops virtio_vdpa_config_ops = { 429 367 .get = virtio_vdpa_get, 430 368 .set = virtio_vdpa_set, ··· 463 349 .get_features = virtio_vdpa_get_features, 464 350 .finalize_features = virtio_vdpa_finalize_features, 465 351 .bus_name = virtio_vdpa_bus_name, 352 + .set_vq_affinity = virtio_vdpa_set_vq_affinity, 353 + .get_vq_affinity = virtio_vdpa_get_vq_affinity, 466 354 }; 467 355 468 356 static void virtio_vdpa_release_dev(struct device *_d)

+49 -3

include/linux/vdpa.h

··· 10 10 #include <linux/if_ether.h> 11 11 12 12 /** 13 - * struct vdpa_calllback - vDPA callback definition. 13 + * struct vdpa_callback - vDPA callback definition. 14 14 * @callback: interrupt callback function 15 15 * @private: the data passed to the callback function 16 + * @trigger: the eventfd for the callback (Optional). 17 + * When it is set, the vDPA driver must guarantee that 18 + * signaling it is functional equivalent to triggering 19 + * the callback. Then vDPA parent can signal it directly 20 + * instead of triggering the callback. 16 21 */ 17 22 struct vdpa_callback { 18 23 irqreturn_t (*callback)(void *data); 19 24 void *private; 25 + struct eventfd_ctx *trigger; 20 26 }; 21 27 22 28 /** ··· 120 114 }; 121 115 122 116 /** 123 - * Corresponding file area for device memory mapping 117 + * struct vdpa_map_file - file area for device memory mapping 124 118 * @file: vma->vm_file for the mapping 125 119 * @offset: mapping offset in the vm_file 126 120 */ ··· 149 143 * @kick_vq: Kick the virtqueue 150 144 * @vdev: vdpa device 151 145 * @idx: virtqueue index 146 + * @kick_vq_with_data: Kick the virtqueue and supply extra data 147 + * (only if VIRTIO_F_NOTIFICATION_DATA is negotiated) 148 + * @vdev: vdpa device 149 + * @data for split virtqueue: 150 + * 16 bits vqn and 16 bits next available index. 151 + * @data for packed virtqueue: 152 + * 16 bits vqn, 15 least significant bits of 153 + * next available index and 1 bit next_wrap. 152 154 * @set_vq_cb: Set the interrupt callback function for 153 155 * a virtqueue 154 156 * @vdev: vdpa device ··· 179 165 * @vdev: vdpa device 180 166 * @idx: virtqueue index 181 167 * @state: pointer to returned state (last_avail_idx) 168 + * @get_vendor_vq_stats: Get the vendor statistics of a device. 169 + * @vdev: vdpa device 170 + * @idx: virtqueue index 171 + * @msg: socket buffer holding stats message 172 + * @extack: extack for reporting error messages 173 + * Returns integer: success (0) or error (< 0) 182 174 * @get_vq_notification: Get the notification area for a virtqueue (optional) 183 175 * @vdev: vdpa device 184 176 * @idx: virtqueue index 185 - * Returns the notifcation area 177 + * Returns the notification area 186 178 * @get_vq_irq: Get the irq number of a virtqueue (optional, 187 179 * but must implemented if require vq irq offloading) 188 180 * @vdev: vdpa device ··· 270 250 * @vdev: vdpa device 271 251 * Returns the iova range supported by 272 252 * the device. 253 + * @set_vq_affinity: Set the affinity of virtqueue (optional) 254 + * @vdev: vdpa device 255 + * @idx: virtqueue index 256 + * @cpu_mask: the affinity mask 257 + * Returns integer: success (0) or error (< 0) 258 + * @get_vq_affinity: Get the affinity of virtqueue (optional) 259 + * @vdev: vdpa device 260 + * @idx: virtqueue index 261 + * Returns the affinity mask 273 262 * @set_group_asid: Set address space identifier for a 274 263 * virtqueue group (optional) 275 264 * @vdev: vdpa device ··· 319 290 * @vdev: vdpa device 320 291 * @idx: virtqueue index 321 292 * Returns pointer to structure device or error (NULL) 293 + * @bind_mm: Bind the device to a specific address space 294 + * so the vDPA framework can use VA when this 295 + * callback is implemented. (optional) 296 + * @vdev: vdpa device 297 + * @mm: address space to bind 298 + * @unbind_mm: Unbind the device from the address space 299 + * bound using the bind_mm callback. (optional) 300 + * @vdev: vdpa device 322 301 * @free: Free resources that belongs to vDPA (optional) 323 302 * @vdev: vdpa device 324 303 */ ··· 337 300 u64 device_area); 338 301 void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num); 339 302 void (*kick_vq)(struct vdpa_device *vdev, u16 idx); 303 + void (*kick_vq_with_data)(struct vdpa_device *vdev, u32 data); 340 304 void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx, 341 305 struct vdpa_callback *cb); 342 306 void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready); ··· 378 340 const void *buf, unsigned int len); 379 341 u32 (*get_generation)(struct vdpa_device *vdev); 380 342 struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); 343 + int (*set_vq_affinity)(struct vdpa_device *vdev, u16 idx, 344 + const struct cpumask *cpu_mask); 345 + const struct cpumask *(*get_vq_affinity)(struct vdpa_device *vdev, 346 + u16 idx); 381 347 382 348 /* DMA ops */ 383 349 int (*set_map)(struct vdpa_device *vdev, unsigned int asid, ··· 393 351 int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, 394 352 unsigned int asid); 395 353 struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); 354 + int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm); 355 + void (*unbind_mm)(struct vdpa_device *vdev); 396 356 397 357 /* Free device resources */ 398 358 void (*free)(struct vdpa_device *vdev); ··· 550 506 * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that 551 507 * management device support during dev_add callback 552 508 * @list: list entry 509 + * @supported_features: features supported by device 510 + * @max_supported_vqs: maximum number of virtqueues supported by device 553 511 */ 554 512 struct vdpa_mgmt_dev { 555 513 struct device *device;

+8 -8

include/linux/virtio.h

··· 34 34 unsigned int index; 35 35 unsigned int num_free; 36 36 unsigned int num_max; 37 - void *priv; 38 37 bool reset; 38 + void *priv; 39 39 }; 40 40 41 41 int virtqueue_add_outbuf(struct virtqueue *vq, ··· 84 84 85 85 void *virtqueue_detach_unused_buf(struct virtqueue *vq); 86 86 87 - unsigned int virtqueue_get_vring_size(struct virtqueue *vq); 87 + unsigned int virtqueue_get_vring_size(const struct virtqueue *vq); 88 88 89 - bool virtqueue_is_broken(struct virtqueue *vq); 89 + bool virtqueue_is_broken(const struct virtqueue *vq); 90 90 91 - const struct vring *virtqueue_get_vring(struct virtqueue *vq); 92 - dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); 93 - dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); 94 - dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); 91 + const struct vring *virtqueue_get_vring(const struct virtqueue *vq); 92 + dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *vq); 93 + dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *vq); 94 + dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); 95 95 96 96 int virtqueue_resize(struct virtqueue *vq, u32 num, 97 97 void (*recycle)(struct virtqueue *vq, void *buf)); ··· 147 147 #endif 148 148 void virtio_reset_device(struct virtio_device *dev); 149 149 150 - size_t virtio_max_dma_size(struct virtio_device *vdev); 150 + size_t virtio_max_dma_size(const struct virtio_device *vdev); 151 151 152 152 #define virtio_device_for_each_vq(vdev, vq) \ 153 153 list_for_each_entry(vq, &vdev->vqs, list)

+3

include/linux/virtio_ring.h

··· 58 58 59 59 struct virtio_device; 60 60 struct virtqueue; 61 + struct device; 61 62 62 63 /* 63 64 * Creates a virtqueue and allocates the descriptor ring. If ··· 118 117 void vring_transport_features(struct virtio_device *vdev); 119 118 120 119 irqreturn_t vring_interrupt(int irq, void *_vq); 120 + 121 + u32 vring_notification_data(struct virtqueue *_vq); 121 122 #endif /* _LINUX_VIRTIO_RING_H */

+24 -2

include/linux/vringh.h

··· 32 32 /* Can we get away with weak barriers? */ 33 33 bool weak_barriers; 34 34 35 + /* Use user's VA */ 36 + bool use_va; 37 + 35 38 /* Last available index we saw (ie. where we're up to). */ 36 39 u16 last_avail_idx; 37 40 ··· 57 54 void (*notify)(struct vringh *); 58 55 }; 59 56 57 + struct virtio_device; 58 + typedef void vrh_callback_t(struct virtio_device *, struct vringh *); 59 + 60 60 /** 61 61 * struct vringh_config_ops - ops for creating a host vring from a virtio driver 62 62 * @find_vrhs: find the host vrings and instantiate them ··· 71 65 * Returns 0 on success or error status 72 66 * @del_vrhs: free the host vrings found by find_vrhs(). 73 67 */ 74 - struct virtio_device; 75 - typedef void vrh_callback_t(struct virtio_device *, struct vringh *); 76 68 struct vringh_config_ops { 77 69 int (*find_vrhs)(struct virtio_device *vdev, unsigned nhvrs, 78 70 struct vringh *vrhs[], vrh_callback_t *callbacks[]); ··· 85 81 86 82 /** 87 83 * struct vringh_iov - iovec mangler. 84 + * @iov: array of iovecs to operate on 85 + * @consumed: number of bytes consumed within iov[i] 86 + * @i: index of current iovec 87 + * @used: number of iovecs present in @iov 88 + * @max_num: maximum number of iovecs. 89 + * corresponds to allocated memory of @iov 88 90 * 89 91 * Mangles iovec in place, and restores it. 90 92 * Remaining data is iov + i, of used - i elements. ··· 103 93 104 94 /** 105 95 * struct vringh_kiov - kvec mangler. 96 + * @iov: array of iovecs to operate on 97 + * @consumed: number of bytes consumed within iov[i] 98 + * @i: index of current iovec 99 + * @used: number of iovecs present in @iov 100 + * @max_num: maximum number of iovecs. 101 + * corresponds to allocated memory of @iov 106 102 * 107 103 * Mangles kvec in place, and restores it. 108 104 * Remaining data is iov + i, of used - i elements. ··· 299 283 struct vring_desc *desc, 300 284 struct vring_avail *avail, 301 285 struct vring_used *used); 286 + 287 + int vringh_init_iotlb_va(struct vringh *vrh, u64 features, 288 + unsigned int num, bool weak_barriers, 289 + struct vring_desc *desc, 290 + struct vring_avail *avail, 291 + struct vring_used *used); 302 292 303 293 int vringh_getdesc_iotlb(struct vringh *vrh, 304 294 struct vringh_kiov *riov,

+6

include/uapi/linux/virtio_config.h

··· 100 100 #define VIRTIO_F_SR_IOV 37 101 101 102 102 /* 103 + * This feature indicates that the driver passes extra data (besides 104 + * identifying the virtqueue) in its device notifications. 105 + */ 106 + #define VIRTIO_F_NOTIFICATION_DATA 38 107 + 108 + /* 103 109 * This feature indicates that the driver can reset a queue individually. 104 110 */ 105 111 #define VIRTIO_F_RING_RESET 40

+1

lib/group_cpus.c

··· 426 426 return masks; 427 427 } 428 428 #endif /* CONFIG_SMP */ 429 + EXPORT_SYMBOL_GPL(group_cpus_evenly);

+5

tools/include/linux/types.h

··· 49 49 #endif 50 50 51 51 #define __force 52 + /* This is defined in linux/compiler_types.h and is left for backward 53 + * compatibility. 54 + */ 55 + #ifndef __user 52 56 #define __user 57 + #endif 53 58 #define __must_check 54 59 #define __cold 55 60

+2

tools/virtio/linux/compiler.h

··· 2 2 #ifndef LINUX_COMPILER_H 3 3 #define LINUX_COMPILER_H 4 4 5 + #include "../../../include/linux/compiler_types.h" 6 + 5 7 #define WRITE_ONCE(var, val) \ 6 8 (*((volatile typeof(val) *)(&(var))) = (val)) 7 9

+1 -4

tools/virtio/linux/kernel.h

··· 10 10 #include <stdarg.h> 11 11 12 12 #include <linux/compiler.h> 13 + #include "../../../include/linux/container_of.h" 13 14 #include <linux/log2.h> 14 15 #include <linux/types.h> 15 16 #include <linux/overflow.h> ··· 107 106 { 108 107 free((void *)addr); 109 108 } 110 - 111 - #define container_of(ptr, type, member) ({ \ 112 - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 113 - (type *)( (char *)__mptr - offsetof(type,member) );}) 114 109 115 110 # ifndef likely 116 111 # define likely(x) (__builtin_expect(!!(x), 1))

+2 -9

tools/virtio/linux/uaccess.h

··· 6 6 7 7 extern void *__user_addr_min, *__user_addr_max; 8 8 9 - static inline void __chk_user_ptr(const volatile void *p, size_t size) 10 - { 11 - assert(p >= __user_addr_min && p + size <= __user_addr_max); 12 - } 13 - 14 9 #define put_user(x, ptr) \ 15 10 ({ \ 16 11 typeof(ptr) __pu_ptr = (ptr); \ 17 - __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ 12 + __chk_user_ptr(__pu_ptr); \ 18 13 WRITE_ONCE(*(__pu_ptr), x); \ 19 14 0; \ 20 15 }) ··· 17 22 #define get_user(x, ptr) \ 18 23 ({ \ 19 24 typeof(ptr) __pu_ptr = (ptr); \ 20 - __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ 25 + __chk_user_ptr(__pu_ptr); \ 21 26 x = READ_ONCE(*(__pu_ptr)); \ 22 27 0; \ 23 28 }) ··· 32 37 static inline int copy_from_user(void *to, const void __user volatile *from, 33 38 unsigned long n) 34 39 { 35 - __chk_user_ptr(from, n); 36 40 volatile_memcpy(to, from, n); 37 41 return 0; 38 42 } ··· 39 45 static inline int copy_to_user(void __user volatile *to, const void *from, 40 46 unsigned long n) 41 47 { 42 - __chk_user_ptr(to, n); 43 48 volatile_memcpy(to, from, n); 44 49 return 0; 45 50 }

+6 -6

tools/virtio/virtio_test.c

··· 134 134 dev->buf_size = 1024; 135 135 dev->buf = malloc(dev->buf_size); 136 136 assert(dev->buf); 137 - dev->control = open("/dev/vhost-test", O_RDWR); 137 + dev->control = open("/dev/vhost-test", O_RDWR); 138 138 assert(dev->control >= 0); 139 139 r = ioctl(dev->control, VHOST_SET_OWNER, NULL); 140 140 assert(r >= 0); ··· 327 327 } 328 328 }; 329 329 330 - static void help(void) 330 + static void help(int status) 331 331 { 332 332 fprintf(stderr, "Usage: virtio_test [--help]" 333 333 " [--no-indirect]" ··· 337 337 " [--batch=random/N]" 338 338 " [--reset=N]" 339 339 "\n"); 340 + 341 + exit(status); 340 342 } 341 343 342 344 int main(int argc, char **argv) ··· 356 354 case -1: 357 355 goto done; 358 356 case '?': 359 - help(); 360 - exit(2); 357 + help(2); 361 358 case 'e': 362 359 features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); 363 360 break; 364 361 case 'h': 365 - help(); 366 - goto done; 362 + help(0); 367 363 case 'i': 368 364 features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); 369 365 break;