Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

- Some bug fixes

- The new vdpa subsystem with two first drivers

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
virtio-balloon: Revert "virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM"
vdpa: move to drivers/vdpa
virtio: Intel IFC VF driver for VDPA
vdpasim: vDPA device simulator
vhost: introduce vDPA-based backend
virtio: introduce a vDPA based transport
vDPA: introduce vDPA bus
vringh: IOTLB support
vhost: factor out IOTLB
vhost: allow per device message handler
vhost: refine vhost and vringh kconfig
virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM
virtio-net: Introduce hash report feature
virtio-net: Introduce RSS receive steering feature
virtio-net: Introduce extended RSC feature
tools/virtio: option to build an out of tree module

+4310 -249
+3
MAINTAINERS
··· 17870 17870 S: Maintained 17871 17871 F: Documentation/devicetree/bindings/virtio/ 17872 17872 F: drivers/virtio/ 17873 + F: drivers/vdpa/ 17873 17874 F: tools/virtio/ 17874 17875 F: drivers/net/virtio_net.c 17875 17876 F: drivers/block/virtio_blk.c 17876 17877 F: include/linux/virtio*.h 17878 + F: include/linux/vdpa.h 17877 17879 F: include/uapi/linux/virtio_*.h 17878 17880 F: drivers/crypto/virtio/ 17879 17881 F: mm/balloon_compaction.c ··· 17943 17941 S: Maintained 17944 17942 F: drivers/vhost/ 17945 17943 F: include/uapi/linux/vhost.h 17944 + F: include/linux/vhost_iotlb.h 17946 17945 17947 17946 VIRTIO INPUT DRIVER 17948 17947 M: Gerd Hoffmann <kraxel@redhat.com>
-2
arch/arm64/kvm/Kconfig
··· 64 64 config KVM_INDIRECT_VECTORS 65 65 def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) 66 66 67 - source "drivers/vhost/Kconfig" 68 - 69 67 endif # VIRTUALIZATION
-2
arch/mips/kvm/Kconfig
··· 72 72 73 73 If unsure, say N. 74 74 75 - source "drivers/vhost/Kconfig" 76 - 77 75 endif # VIRTUALIZATION
-2
arch/powerpc/kvm/Kconfig
··· 204 204 default y 205 205 depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE 206 206 207 - source "drivers/vhost/Kconfig" 208 - 209 207 endif # VIRTUALIZATION
-4
arch/s390/kvm/Kconfig
··· 55 55 56 56 If unsure, say N. 57 57 58 - # OK, it's a little counter-intuitive to do this, but it puts it neatly under 59 - # the virtualization menu. 60 - source "drivers/vhost/Kconfig" 61 - 62 58 endif # VIRTUALIZATION
-4
arch/x86/kvm/Kconfig
··· 107 107 This option adds a R/W kVM module parameter 'mmu_audit', which allows 108 108 auditing of KVM MMU events at runtime. 109 109 110 - # OK, it's a little counter-intuitive to do this, but it puts it neatly under 111 - # the virtualization menu. 112 - source "drivers/vhost/Kconfig" 113 - 114 110 endif # VIRTUALIZATION
+4
drivers/Kconfig
··· 138 138 139 139 source "drivers/virtio/Kconfig" 140 140 141 + source "drivers/vdpa/Kconfig" 142 + 143 + source "drivers/vhost/Kconfig" 144 + 141 145 source "drivers/hv/Kconfig" 142 146 143 147 source "drivers/xen/Kconfig"
+1
drivers/Makefile
··· 42 42 obj-y += soc/ 43 43 44 44 obj-$(CONFIG_VIRTIO) += virtio/ 45 + obj-$(CONFIG_VDPA) += vdpa/ 45 46 obj-$(CONFIG_XEN) += xen/ 46 47 47 48 # regulators early, since some subsystems rely on them to initialize
-4
drivers/misc/mic/Kconfig
··· 133 133 OS and tools for MIC to use with this driver are available from 134 134 <http://software.intel.com/en-us/mic-developer>. 135 135 136 - if VOP 137 - source "drivers/vhost/Kconfig.vringh" 138 - endif 139 - 140 136 endmenu
-4
drivers/net/caif/Kconfig
··· 58 58 ---help--- 59 59 The CAIF driver for CAIF over Virtio. 60 60 61 - if CAIF_VIRTIO 62 - source "drivers/vhost/Kconfig.vringh" 63 - endif 64 - 65 61 endif # CAIF_DRIVERS
+37
drivers/vdpa/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + config VDPA 3 + tristate 4 + help 5 + Enable this module to support vDPA device that uses a 6 + datapath which complies with virtio specifications with 7 + vendor specific control path. 8 + 9 + menuconfig VDPA_MENU 10 + bool "VDPA drivers" 11 + default n 12 + 13 + if VDPA_MENU 14 + 15 + config VDPA_SIM 16 + tristate "vDPA device simulator" 17 + depends on RUNTIME_TESTING_MENU 18 + select VDPA 19 + select VHOST_RING 20 + default n 21 + help 22 + vDPA networking device simulator which loop TX traffic back 23 + to RX. This device is used for testing, prototyping and 24 + development of vDPA. 25 + 26 + config IFCVF 27 + tristate "Intel IFC VF VDPA driver" 28 + depends on PCI_MSI 29 + select VDPA 30 + default n 31 + help 32 + This kernel module can drive Intel IFC VF NIC to offload 33 + virtio dataplane traffic to hardware. 34 + To compile this driver as a module, choose M here: the module will 35 + be called ifcvf. 36 + 37 + endif # VDPA_MENU
+4
drivers/vdpa/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + obj-$(CONFIG_VDPA) += vdpa.o 3 + obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ 4 + obj-$(CONFIG_IFCVF) += ifcvf/
+3
drivers/vdpa/ifcvf/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + obj-$(CONFIG_IFCVF) += ifcvf.o 3 + ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o
+389
drivers/vdpa/ifcvf/ifcvf_base.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Intel IFC VF NIC driver for virtio dataplane offloading 4 + * 5 + * Copyright (C) 2020 Intel Corporation. 6 + * 7 + * Author: Zhu Lingshan <lingshan.zhu@intel.com> 8 + * 9 + */ 10 + 11 + #include "ifcvf_base.h" 12 + 13 + static inline u8 ifc_ioread8(u8 __iomem *addr) 14 + { 15 + return ioread8(addr); 16 + } 17 + static inline u16 ifc_ioread16 (__le16 __iomem *addr) 18 + { 19 + return ioread16(addr); 20 + } 21 + 22 + static inline u32 ifc_ioread32(__le32 __iomem *addr) 23 + { 24 + return ioread32(addr); 25 + } 26 + 27 + static inline void ifc_iowrite8(u8 value, u8 __iomem *addr) 28 + { 29 + iowrite8(value, addr); 30 + } 31 + 32 + static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr) 33 + { 34 + iowrite16(value, addr); 35 + } 36 + 37 + static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr) 38 + { 39 + iowrite32(value, addr); 40 + } 41 + 42 + static void ifc_iowrite64_twopart(u64 val, 43 + __le32 __iomem *lo, __le32 __iomem *hi) 44 + { 45 + ifc_iowrite32((u32)val, lo); 46 + ifc_iowrite32(val >> 32, hi); 47 + } 48 + 49 + struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) 50 + { 51 + return container_of(hw, struct ifcvf_adapter, vf); 52 + } 53 + 54 + static void __iomem *get_cap_addr(struct ifcvf_hw *hw, 55 + struct virtio_pci_cap *cap) 56 + { 57 + struct ifcvf_adapter *ifcvf; 58 + struct pci_dev *pdev; 59 + u32 length, offset; 60 + u8 bar; 61 + 62 + length = le32_to_cpu(cap->length); 63 + offset = le32_to_cpu(cap->offset); 64 + bar = cap->bar; 65 + 66 + ifcvf= vf_to_adapter(hw); 67 + pdev = ifcvf->pdev; 68 + 69 + if (bar >= IFCVF_PCI_MAX_RESOURCE) { 70 + IFCVF_DBG(pdev, 71 + "Invalid bar number %u to get capabilities\n", bar); 72 + return NULL; 73 + } 74 + 75 + if (offset + length > pci_resource_len(pdev, bar)) { 76 + IFCVF_DBG(pdev, 77 + "offset(%u) + len(%u) overflows bar%u's capability\n", 78 + offset, length, bar); 79 + return NULL; 80 + } 81 + 82 + return hw->base[bar] + offset; 83 + } 84 + 85 + static int ifcvf_read_config_range(struct pci_dev *dev, 86 + uint32_t *val, int size, int where) 87 + { 88 + int ret, i; 89 + 90 + for (i = 0; i < size; i += 4) { 91 + ret = pci_read_config_dword(dev, where + i, val + i / 4); 92 + if (ret < 0) 93 + return ret; 94 + } 95 + 96 + return 0; 97 + } 98 + 99 + int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) 100 + { 101 + struct virtio_pci_cap cap; 102 + u16 notify_off; 103 + int ret; 104 + u8 pos; 105 + u32 i; 106 + 107 + ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); 108 + if (ret < 0) { 109 + IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); 110 + return -EIO; 111 + } 112 + 113 + while (pos) { 114 + ret = ifcvf_read_config_range(pdev, (u32 *)&cap, 115 + sizeof(cap), pos); 116 + if (ret < 0) { 117 + IFCVF_ERR(pdev, 118 + "Failed to get PCI capability at %x\n", pos); 119 + break; 120 + } 121 + 122 + if (cap.cap_vndr != PCI_CAP_ID_VNDR) 123 + goto next; 124 + 125 + switch (cap.cfg_type) { 126 + case VIRTIO_PCI_CAP_COMMON_CFG: 127 + hw->common_cfg = get_cap_addr(hw, &cap); 128 + IFCVF_DBG(pdev, "hw->common_cfg = %p\n", 129 + hw->common_cfg); 130 + break; 131 + case VIRTIO_PCI_CAP_NOTIFY_CFG: 132 + pci_read_config_dword(pdev, pos + sizeof(cap), 133 + &hw->notify_off_multiplier); 134 + hw->notify_bar = cap.bar; 135 + hw->notify_base = get_cap_addr(hw, &cap); 136 + IFCVF_DBG(pdev, "hw->notify_base = %p\n", 137 + hw->notify_base); 138 + break; 139 + case VIRTIO_PCI_CAP_ISR_CFG: 140 + hw->isr = get_cap_addr(hw, &cap); 141 + IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr); 142 + break; 143 + case VIRTIO_PCI_CAP_DEVICE_CFG: 144 + hw->net_cfg = get_cap_addr(hw, &cap); 145 + IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg); 146 + break; 147 + } 148 + 149 + next: 150 + pos = cap.cap_next; 151 + } 152 + 153 + if (hw->common_cfg == NULL || hw->notify_base == NULL || 154 + hw->isr == NULL || hw->net_cfg == NULL) { 155 + IFCVF_ERR(pdev, "Incomplete PCI capabilities\n"); 156 + return -EIO; 157 + } 158 + 159 + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { 160 + ifc_iowrite16(i, &hw->common_cfg->queue_select); 161 + notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); 162 + hw->vring[i].notify_addr = hw->notify_base + 163 + notify_off * hw->notify_off_multiplier; 164 + } 165 + 166 + hw->lm_cfg = hw->base[IFCVF_LM_BAR]; 167 + 168 + IFCVF_DBG(pdev, 169 + "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n", 170 + hw->common_cfg, hw->notify_base, hw->isr, 171 + hw->net_cfg, hw->notify_off_multiplier); 172 + 173 + return 0; 174 + } 175 + 176 + u8 ifcvf_get_status(struct ifcvf_hw *hw) 177 + { 178 + return ifc_ioread8(&hw->common_cfg->device_status); 179 + } 180 + 181 + void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) 182 + { 183 + ifc_iowrite8(status, &hw->common_cfg->device_status); 184 + } 185 + 186 + void ifcvf_reset(struct ifcvf_hw *hw) 187 + { 188 + ifcvf_set_status(hw, 0); 189 + /* flush set_status, make sure VF is stopped, reset */ 190 + ifcvf_get_status(hw); 191 + } 192 + 193 + static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) 194 + { 195 + if (status != 0) 196 + status |= ifcvf_get_status(hw); 197 + 198 + ifcvf_set_status(hw, status); 199 + ifcvf_get_status(hw); 200 + } 201 + 202 + u64 ifcvf_get_features(struct ifcvf_hw *hw) 203 + { 204 + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; 205 + u32 features_lo, features_hi; 206 + 207 + ifc_iowrite32(0, &cfg->device_feature_select); 208 + features_lo = ifc_ioread32(&cfg->device_feature); 209 + 210 + ifc_iowrite32(1, &cfg->device_feature_select); 211 + features_hi = ifc_ioread32(&cfg->device_feature); 212 + 213 + return ((u64)features_hi << 32) | features_lo; 214 + } 215 + 216 + void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, 217 + void *dst, int length) 218 + { 219 + u8 old_gen, new_gen, *p; 220 + int i; 221 + 222 + WARN_ON(offset + length > sizeof(struct virtio_net_config)); 223 + do { 224 + old_gen = ifc_ioread8(&hw->common_cfg->config_generation); 225 + p = dst; 226 + for (i = 0; i < length; i++) 227 + *p++ = ifc_ioread8(hw->net_cfg + offset + i); 228 + 229 + new_gen = ifc_ioread8(&hw->common_cfg->config_generation); 230 + } while (old_gen != new_gen); 231 + } 232 + 233 + void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, 234 + const void *src, int length) 235 + { 236 + const u8 *p; 237 + int i; 238 + 239 + p = src; 240 + WARN_ON(offset + length > sizeof(struct virtio_net_config)); 241 + for (i = 0; i < length; i++) 242 + ifc_iowrite8(*p++, hw->net_cfg + offset + i); 243 + } 244 + 245 + static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) 246 + { 247 + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; 248 + 249 + ifc_iowrite32(0, &cfg->guest_feature_select); 250 + ifc_iowrite32((u32)features, &cfg->guest_feature); 251 + 252 + ifc_iowrite32(1, &cfg->guest_feature_select); 253 + ifc_iowrite32(features >> 32, &cfg->guest_feature); 254 + } 255 + 256 + static int ifcvf_config_features(struct ifcvf_hw *hw) 257 + { 258 + struct ifcvf_adapter *ifcvf; 259 + 260 + ifcvf = vf_to_adapter(hw); 261 + ifcvf_set_features(hw, hw->req_features); 262 + ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK); 263 + 264 + if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) { 265 + IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n"); 266 + return -EIO; 267 + } 268 + 269 + return 0; 270 + } 271 + 272 + u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) 273 + { 274 + struct ifcvf_lm_cfg __iomem *ifcvf_lm; 275 + void __iomem *avail_idx_addr; 276 + u16 last_avail_idx; 277 + u32 q_pair_id; 278 + 279 + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; 280 + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); 281 + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; 282 + last_avail_idx = ifc_ioread16(avail_idx_addr); 283 + 284 + return last_avail_idx; 285 + } 286 + 287 + int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num) 288 + { 289 + struct ifcvf_lm_cfg __iomem *ifcvf_lm; 290 + void __iomem *avail_idx_addr; 291 + u32 q_pair_id; 292 + 293 + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; 294 + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); 295 + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; 296 + hw->vring[qid].last_avail_idx = num; 297 + ifc_iowrite16(num, avail_idx_addr); 298 + 299 + return 0; 300 + } 301 + 302 + static int ifcvf_hw_enable(struct ifcvf_hw *hw) 303 + { 304 + struct ifcvf_lm_cfg __iomem *ifcvf_lm; 305 + struct virtio_pci_common_cfg __iomem *cfg; 306 + struct ifcvf_adapter *ifcvf; 307 + u32 i; 308 + 309 + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; 310 + ifcvf = vf_to_adapter(hw); 311 + cfg = hw->common_cfg; 312 + ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); 313 + 314 + if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { 315 + IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); 316 + return -EINVAL; 317 + } 318 + 319 + for (i = 0; i < hw->nr_vring; i++) { 320 + if (!hw->vring[i].ready) 321 + break; 322 + 323 + ifc_iowrite16(i, &cfg->queue_select); 324 + ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, 325 + &cfg->queue_desc_hi); 326 + ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, 327 + &cfg->queue_avail_hi); 328 + ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, 329 + &cfg->queue_used_hi); 330 + ifc_iowrite16(hw->vring[i].size, &cfg->queue_size); 331 + ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); 332 + 333 + if (ifc_ioread16(&cfg->queue_msix_vector) == 334 + VIRTIO_MSI_NO_VECTOR) { 335 + IFCVF_ERR(ifcvf->pdev, 336 + "No msix vector for queue %u\n", i); 337 + return -EINVAL; 338 + } 339 + 340 + ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); 341 + ifc_iowrite16(1, &cfg->queue_enable); 342 + } 343 + 344 + return 0; 345 + } 346 + 347 + static void ifcvf_hw_disable(struct ifcvf_hw *hw) 348 + { 349 + struct virtio_pci_common_cfg __iomem *cfg; 350 + u32 i; 351 + 352 + cfg = hw->common_cfg; 353 + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); 354 + 355 + for (i = 0; i < hw->nr_vring; i++) { 356 + ifc_iowrite16(i, &cfg->queue_select); 357 + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); 358 + } 359 + 360 + ifc_ioread16(&cfg->queue_msix_vector); 361 + } 362 + 363 + int ifcvf_start_hw(struct ifcvf_hw *hw) 364 + { 365 + ifcvf_reset(hw); 366 + ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE); 367 + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER); 368 + 369 + if (ifcvf_config_features(hw) < 0) 370 + return -EINVAL; 371 + 372 + if (ifcvf_hw_enable(hw) < 0) 373 + return -EINVAL; 374 + 375 + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK); 376 + 377 + return 0; 378 + } 379 + 380 + void ifcvf_stop_hw(struct ifcvf_hw *hw) 381 + { 382 + ifcvf_hw_disable(hw); 383 + ifcvf_reset(hw); 384 + } 385 + 386 + void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) 387 + { 388 + ifc_iowrite16(qid, hw->vring[qid].notify_addr); 389 + }
+118
drivers/vdpa/ifcvf/ifcvf_base.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Intel IFC VF NIC driver for virtio dataplane offloading 4 + * 5 + * Copyright (C) 2020 Intel Corporation. 6 + * 7 + * Author: Zhu Lingshan <lingshan.zhu@intel.com> 8 + * 9 + */ 10 + 11 + #ifndef _IFCVF_H_ 12 + #define _IFCVF_H_ 13 + 14 + #include <linux/pci.h> 15 + #include <linux/pci_regs.h> 16 + #include <linux/vdpa.h> 17 + #include <uapi/linux/virtio_net.h> 18 + #include <uapi/linux/virtio_config.h> 19 + #include <uapi/linux/virtio_pci.h> 20 + 21 + #define IFCVF_VENDOR_ID 0x1AF4 22 + #define IFCVF_DEVICE_ID 0x1041 23 + #define IFCVF_SUBSYS_VENDOR_ID 0x8086 24 + #define IFCVF_SUBSYS_DEVICE_ID 0x001A 25 + 26 + #define IFCVF_SUPPORTED_FEATURES \ 27 + ((1ULL << VIRTIO_NET_F_MAC) | \ 28 + (1ULL << VIRTIO_F_ANY_LAYOUT) | \ 29 + (1ULL << VIRTIO_F_VERSION_1) | \ 30 + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ 31 + (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ 32 + (1ULL << VIRTIO_NET_F_MRG_RXBUF)) 33 + 34 + /* Only one queue pair for now. */ 35 + #define IFCVF_MAX_QUEUE_PAIRS 1 36 + 37 + #define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE 38 + #define IFCVF_QUEUE_MAX 32768 39 + #define IFCVF_MSI_CONFIG_OFF 0 40 + #define IFCVF_MSI_QUEUE_OFF 1 41 + #define IFCVF_PCI_MAX_RESOURCE 6 42 + 43 + #define IFCVF_LM_CFG_SIZE 0x40 44 + #define IFCVF_LM_RING_STATE_OFFSET 0x20 45 + #define IFCVF_LM_BAR 4 46 + 47 + #define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) 48 + #define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__) 49 + #define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__) 50 + 51 + #define ifcvf_private_to_vf(adapter) \ 52 + (&((struct ifcvf_adapter *)adapter)->vf) 53 + 54 + #define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1) 55 + 56 + struct vring_info { 57 + u64 desc; 58 + u64 avail; 59 + u64 used; 60 + u16 size; 61 + u16 last_avail_idx; 62 + bool ready; 63 + void __iomem *notify_addr; 64 + u32 irq; 65 + struct vdpa_callback cb; 66 + char msix_name[256]; 67 + }; 68 + 69 + struct ifcvf_hw { 70 + u8 __iomem *isr; 71 + /* Live migration */ 72 + u8 __iomem *lm_cfg; 73 + u16 nr_vring; 74 + /* Notification bar number */ 75 + u8 notify_bar; 76 + /* Notificaiton bar address */ 77 + void __iomem *notify_base; 78 + u32 notify_off_multiplier; 79 + u64 req_features; 80 + struct virtio_pci_common_cfg __iomem *common_cfg; 81 + void __iomem *net_cfg; 82 + struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; 83 + void __iomem * const *base; 84 + }; 85 + 86 + struct ifcvf_adapter { 87 + struct vdpa_device vdpa; 88 + struct pci_dev *pdev; 89 + struct ifcvf_hw vf; 90 + }; 91 + 92 + struct ifcvf_vring_lm_cfg { 93 + u32 idx_addr[2]; 94 + u8 reserved[IFCVF_LM_CFG_SIZE - 8]; 95 + }; 96 + 97 + struct ifcvf_lm_cfg { 98 + u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; 99 + struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS]; 100 + }; 101 + 102 + int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); 103 + int ifcvf_start_hw(struct ifcvf_hw *hw); 104 + void ifcvf_stop_hw(struct ifcvf_hw *hw); 105 + void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); 106 + void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, 107 + void *dst, int length); 108 + void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, 109 + const void *src, int length); 110 + u8 ifcvf_get_status(struct ifcvf_hw *hw); 111 + void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); 112 + void io_write64_twopart(u64 val, u32 *lo, u32 *hi); 113 + void ifcvf_reset(struct ifcvf_hw *hw); 114 + u64 ifcvf_get_features(struct ifcvf_hw *hw); 115 + u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); 116 + int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num); 117 + struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); 118 + #endif /* _IFCVF_H_ */
+435
drivers/vdpa/ifcvf/ifcvf_main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Intel IFC VF NIC driver for virtio dataplane offloading 4 + * 5 + * Copyright (C) 2020 Intel Corporation. 6 + * 7 + * Author: Zhu Lingshan <lingshan.zhu@intel.com> 8 + * 9 + */ 10 + 11 + #include <linux/interrupt.h> 12 + #include <linux/module.h> 13 + #include <linux/pci.h> 14 + #include <linux/sysfs.h> 15 + #include "ifcvf_base.h" 16 + 17 + #define VERSION_STRING "0.1" 18 + #define DRIVER_AUTHOR "Intel Corporation" 19 + #define IFCVF_DRIVER_NAME "ifcvf" 20 + 21 + static irqreturn_t ifcvf_intr_handler(int irq, void *arg) 22 + { 23 + struct vring_info *vring = arg; 24 + 25 + if (vring->cb.callback) 26 + return vring->cb.callback(vring->cb.private); 27 + 28 + return IRQ_HANDLED; 29 + } 30 + 31 + static int ifcvf_start_datapath(void *private) 32 + { 33 + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); 34 + struct ifcvf_adapter *ifcvf; 35 + u8 status; 36 + int ret; 37 + 38 + ifcvf = vf_to_adapter(vf); 39 + vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2; 40 + ret = ifcvf_start_hw(vf); 41 + if (ret < 0) { 42 + status = ifcvf_get_status(vf); 43 + status |= VIRTIO_CONFIG_S_FAILED; 44 + ifcvf_set_status(vf, status); 45 + } 46 + 47 + return ret; 48 + } 49 + 50 + static int ifcvf_stop_datapath(void *private) 51 + { 52 + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); 53 + int i; 54 + 55 + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) 56 + vf->vring[i].cb.callback = NULL; 57 + 58 + ifcvf_stop_hw(vf); 59 + 60 + return 0; 61 + } 62 + 63 + static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) 64 + { 65 + struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter); 66 + int i; 67 + 68 + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { 69 + vf->vring[i].last_avail_idx = 0; 70 + vf->vring[i].desc = 0; 71 + vf->vring[i].avail = 0; 72 + vf->vring[i].used = 0; 73 + vf->vring[i].ready = 0; 74 + vf->vring[i].cb.callback = NULL; 75 + vf->vring[i].cb.private = NULL; 76 + } 77 + 78 + ifcvf_reset(vf); 79 + } 80 + 81 + static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev) 82 + { 83 + return container_of(vdpa_dev, struct ifcvf_adapter, vdpa); 84 + } 85 + 86 + static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) 87 + { 88 + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); 89 + 90 + return &adapter->vf; 91 + } 92 + 93 + static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) 94 + { 95 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 96 + u64 features; 97 + 98 + features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES; 99 + 100 + return features; 101 + } 102 + 103 + static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) 104 + { 105 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 106 + 107 + vf->req_features = features; 108 + 109 + return 0; 110 + } 111 + 112 + static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) 113 + { 114 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 115 + 116 + return ifcvf_get_status(vf); 117 + } 118 + 119 + static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) 120 + { 121 + struct ifcvf_adapter *adapter; 122 + struct ifcvf_hw *vf; 123 + 124 + vf = vdpa_to_vf(vdpa_dev); 125 + adapter = dev_get_drvdata(vdpa_dev->dev.parent); 126 + 127 + if (status == 0) { 128 + ifcvf_stop_datapath(adapter); 129 + ifcvf_reset_vring(adapter); 130 + return; 131 + } 132 + 133 + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 134 + if (ifcvf_start_datapath(adapter) < 0) 135 + IFCVF_ERR(adapter->pdev, 136 + "Failed to set ifcvf vdpa status %u\n", 137 + status); 138 + } 139 + 140 + ifcvf_set_status(vf, status); 141 + } 142 + 143 + static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) 144 + { 145 + return IFCVF_QUEUE_MAX; 146 + } 147 + 148 + static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid) 149 + { 150 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 151 + 152 + return ifcvf_get_vq_state(vf, qid); 153 + } 154 + 155 + static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, 156 + u64 num) 157 + { 158 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 159 + 160 + return ifcvf_set_vq_state(vf, qid, num); 161 + } 162 + 163 + static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, 164 + struct vdpa_callback *cb) 165 + { 166 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 167 + 168 + vf->vring[qid].cb = *cb; 169 + } 170 + 171 + static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, 172 + u16 qid, bool ready) 173 + { 174 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 175 + 176 + vf->vring[qid].ready = ready; 177 + } 178 + 179 + static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) 180 + { 181 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 182 + 183 + return vf->vring[qid].ready; 184 + } 185 + 186 + static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, 187 + u32 num) 188 + { 189 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 190 + 191 + vf->vring[qid].size = num; 192 + } 193 + 194 + static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, 195 + u64 desc_area, u64 driver_area, 196 + u64 device_area) 197 + { 198 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 199 + 200 + vf->vring[qid].desc = desc_area; 201 + vf->vring[qid].avail = driver_area; 202 + vf->vring[qid].used = device_area; 203 + 204 + return 0; 205 + } 206 + 207 + static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) 208 + { 209 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 210 + 211 + ifcvf_notify_queue(vf, qid); 212 + } 213 + 214 + static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) 215 + { 216 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 217 + 218 + return ioread8(&vf->common_cfg->config_generation); 219 + } 220 + 221 + static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) 222 + { 223 + return VIRTIO_ID_NET; 224 + } 225 + 226 + static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) 227 + { 228 + return IFCVF_SUBSYS_VENDOR_ID; 229 + } 230 + 231 + static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) 232 + { 233 + return IFCVF_QUEUE_ALIGNMENT; 234 + } 235 + 236 + static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, 237 + unsigned int offset, 238 + void *buf, unsigned int len) 239 + { 240 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 241 + 242 + WARN_ON(offset + len > sizeof(struct virtio_net_config)); 243 + ifcvf_read_net_config(vf, offset, buf, len); 244 + } 245 + 246 + static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, 247 + unsigned int offset, const void *buf, 248 + unsigned int len) 249 + { 250 + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); 251 + 252 + WARN_ON(offset + len > sizeof(struct virtio_net_config)); 253 + ifcvf_write_net_config(vf, offset, buf, len); 254 + } 255 + 256 + static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, 257 + struct vdpa_callback *cb) 258 + { 259 + /* We don't support config interrupt */ 260 + } 261 + 262 + /* 263 + * IFCVF currently does't have on-chip IOMMU, so not 264 + * implemented set_map()/dma_map()/dma_unmap() 265 + */ 266 + static const struct vdpa_config_ops ifc_vdpa_ops = { 267 + .get_features = ifcvf_vdpa_get_features, 268 + .set_features = ifcvf_vdpa_set_features, 269 + .get_status = ifcvf_vdpa_get_status, 270 + .set_status = ifcvf_vdpa_set_status, 271 + .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, 272 + .get_vq_state = ifcvf_vdpa_get_vq_state, 273 + .set_vq_state = ifcvf_vdpa_set_vq_state, 274 + .set_vq_cb = ifcvf_vdpa_set_vq_cb, 275 + .set_vq_ready = ifcvf_vdpa_set_vq_ready, 276 + .get_vq_ready = ifcvf_vdpa_get_vq_ready, 277 + .set_vq_num = ifcvf_vdpa_set_vq_num, 278 + .set_vq_address = ifcvf_vdpa_set_vq_address, 279 + .kick_vq = ifcvf_vdpa_kick_vq, 280 + .get_generation = ifcvf_vdpa_get_generation, 281 + .get_device_id = ifcvf_vdpa_get_device_id, 282 + .get_vendor_id = ifcvf_vdpa_get_vendor_id, 283 + .get_vq_align = ifcvf_vdpa_get_vq_align, 284 + .get_config = ifcvf_vdpa_get_config, 285 + .set_config = ifcvf_vdpa_set_config, 286 + .set_config_cb = ifcvf_vdpa_set_config_cb, 287 + }; 288 + 289 + static int ifcvf_request_irq(struct ifcvf_adapter *adapter) 290 + { 291 + struct pci_dev *pdev = adapter->pdev; 292 + struct ifcvf_hw *vf = &adapter->vf; 293 + int vector, i, ret, irq; 294 + 295 + 296 + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { 297 + snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", 298 + pci_name(pdev), i); 299 + vector = i + IFCVF_MSI_QUEUE_OFF; 300 + irq = pci_irq_vector(pdev, vector); 301 + ret = devm_request_irq(&pdev->dev, irq, 302 + ifcvf_intr_handler, 0, 303 + vf->vring[i].msix_name, 304 + &vf->vring[i]); 305 + if (ret) { 306 + IFCVF_ERR(pdev, 307 + "Failed to request irq for vq %d\n", i); 308 + return ret; 309 + } 310 + vf->vring[i].irq = irq; 311 + } 312 + 313 + return 0; 314 + } 315 + 316 + static void ifcvf_free_irq_vectors(void *data) 317 + { 318 + pci_free_irq_vectors(data); 319 + } 320 + 321 + static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) 322 + { 323 + struct device *dev = &pdev->dev; 324 + struct ifcvf_adapter *adapter; 325 + struct ifcvf_hw *vf; 326 + int ret; 327 + 328 + ret = pcim_enable_device(pdev); 329 + if (ret) { 330 + IFCVF_ERR(pdev, "Failed to enable device\n"); 331 + return ret; 332 + } 333 + 334 + ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), 335 + IFCVF_DRIVER_NAME); 336 + if (ret) { 337 + IFCVF_ERR(pdev, "Failed to request MMIO region\n"); 338 + return ret; 339 + } 340 + 341 + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 342 + if (ret) { 343 + IFCVF_ERR(pdev, "No usable DMA confiugration\n"); 344 + return ret; 345 + } 346 + 347 + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 348 + if (ret) { 349 + IFCVF_ERR(pdev, 350 + "No usable coherent DMA confiugration\n"); 351 + return ret; 352 + } 353 + 354 + ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, 355 + IFCVF_MAX_INTR, PCI_IRQ_MSIX); 356 + if (ret < 0) { 357 + IFCVF_ERR(pdev, "Failed to alloc irq vectors\n"); 358 + return ret; 359 + } 360 + 361 + ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); 362 + if (ret) { 363 + IFCVF_ERR(pdev, 364 + "Failed for adding devres for freeing irq vectors\n"); 365 + return ret; 366 + } 367 + 368 + adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, 369 + dev, &ifc_vdpa_ops); 370 + if (adapter == NULL) { 371 + IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); 372 + return -ENOMEM; 373 + } 374 + 375 + pci_set_master(pdev); 376 + pci_set_drvdata(pdev, adapter); 377 + 378 + vf = &adapter->vf; 379 + vf->base = pcim_iomap_table(pdev); 380 + 381 + adapter->pdev = pdev; 382 + adapter->vdpa.dma_dev = &pdev->dev; 383 + 384 + ret = ifcvf_request_irq(adapter); 385 + if (ret) { 386 + IFCVF_ERR(pdev, "Failed to request MSI-X irq\n"); 387 + goto err; 388 + } 389 + 390 + ret = ifcvf_init_hw(vf, pdev); 391 + if (ret) { 392 + IFCVF_ERR(pdev, "Failed to init IFCVF hw\n"); 393 + goto err; 394 + } 395 + 396 + ret = vdpa_register_device(&adapter->vdpa); 397 + if (ret) { 398 + IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); 399 + goto err; 400 + } 401 + 402 + return 0; 403 + 404 + err: 405 + put_device(&adapter->vdpa.dev); 406 + return ret; 407 + } 408 + 409 + static void ifcvf_remove(struct pci_dev *pdev) 410 + { 411 + struct ifcvf_adapter *adapter = pci_get_drvdata(pdev); 412 + 413 + vdpa_unregister_device(&adapter->vdpa); 414 + } 415 + 416 + static struct pci_device_id ifcvf_pci_ids[] = { 417 + { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, 418 + IFCVF_DEVICE_ID, 419 + IFCVF_SUBSYS_VENDOR_ID, 420 + IFCVF_SUBSYS_DEVICE_ID) }, 421 + { 0 }, 422 + }; 423 + MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids); 424 + 425 + static struct pci_driver ifcvf_driver = { 426 + .name = IFCVF_DRIVER_NAME, 427 + .id_table = ifcvf_pci_ids, 428 + .probe = ifcvf_probe, 429 + .remove = ifcvf_remove, 430 + }; 431 + 432 + module_pci_driver(ifcvf_driver); 433 + 434 + MODULE_LICENSE("GPL v2"); 435 + MODULE_VERSION(VERSION_STRING);
+180
drivers/vdpa/vdpa.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * vDPA bus. 4 + * 5 + * Copyright (c) 2020, Red Hat. All rights reserved. 6 + * Author: Jason Wang <jasowang@redhat.com> 7 + * 8 + */ 9 + 10 + #include <linux/module.h> 11 + #include <linux/idr.h> 12 + #include <linux/slab.h> 13 + #include <linux/vdpa.h> 14 + 15 + static DEFINE_IDA(vdpa_index_ida); 16 + 17 + static int vdpa_dev_probe(struct device *d) 18 + { 19 + struct vdpa_device *vdev = dev_to_vdpa(d); 20 + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); 21 + int ret = 0; 22 + 23 + if (drv && drv->probe) 24 + ret = drv->probe(vdev); 25 + 26 + return ret; 27 + } 28 + 29 + static int vdpa_dev_remove(struct device *d) 30 + { 31 + struct vdpa_device *vdev = dev_to_vdpa(d); 32 + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); 33 + 34 + if (drv && drv->remove) 35 + drv->remove(vdev); 36 + 37 + return 0; 38 + } 39 + 40 + static struct bus_type vdpa_bus = { 41 + .name = "vdpa", 42 + .probe = vdpa_dev_probe, 43 + .remove = vdpa_dev_remove, 44 + }; 45 + 46 + static void vdpa_release_dev(struct device *d) 47 + { 48 + struct vdpa_device *vdev = dev_to_vdpa(d); 49 + const struct vdpa_config_ops *ops = vdev->config; 50 + 51 + if (ops->free) 52 + ops->free(vdev); 53 + 54 + ida_simple_remove(&vdpa_index_ida, vdev->index); 55 + kfree(vdev); 56 + } 57 + 58 + /** 59 + * __vdpa_alloc_device - allocate and initilaize a vDPA device 60 + * This allows driver to some prepartion after device is 61 + * initialized but before registered. 62 + * @parent: the parent device 63 + * @config: the bus operations that is supported by this device 64 + * @size: size of the parent structure that contains private data 65 + * 66 + * Drvier should use vdap_alloc_device() wrapper macro instead of 67 + * using this directly. 68 + * 69 + * Returns an error when parent/config/dma_dev is not set or fail to get 70 + * ida. 71 + */ 72 + struct vdpa_device *__vdpa_alloc_device(struct device *parent, 73 + const struct vdpa_config_ops *config, 74 + size_t size) 75 + { 76 + struct vdpa_device *vdev; 77 + int err = -EINVAL; 78 + 79 + if (!config) 80 + goto err; 81 + 82 + if (!!config->dma_map != !!config->dma_unmap) 83 + goto err; 84 + 85 + err = -ENOMEM; 86 + vdev = kzalloc(size, GFP_KERNEL); 87 + if (!vdev) 88 + goto err; 89 + 90 + err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); 91 + if (err < 0) 92 + goto err_ida; 93 + 94 + vdev->dev.bus = &vdpa_bus; 95 + vdev->dev.parent = parent; 96 + vdev->dev.release = vdpa_release_dev; 97 + vdev->index = err; 98 + vdev->config = config; 99 + 100 + err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); 101 + if (err) 102 + goto err_name; 103 + 104 + device_initialize(&vdev->dev); 105 + 106 + return vdev; 107 + 108 + err_name: 109 + ida_simple_remove(&vdpa_index_ida, vdev->index); 110 + err_ida: 111 + kfree(vdev); 112 + err: 113 + return ERR_PTR(err); 114 + } 115 + EXPORT_SYMBOL_GPL(__vdpa_alloc_device); 116 + 117 + /** 118 + * vdpa_register_device - register a vDPA device 119 + * Callers must have a succeed call of vdpa_init_device() before. 120 + * @vdev: the vdpa device to be registered to vDPA bus 121 + * 122 + * Returns an error when fail to add to vDPA bus 123 + */ 124 + int vdpa_register_device(struct vdpa_device *vdev) 125 + { 126 + return device_add(&vdev->dev); 127 + } 128 + EXPORT_SYMBOL_GPL(vdpa_register_device); 129 + 130 + /** 131 + * vdpa_unregister_device - unregister a vDPA device 132 + * @vdev: the vdpa device to be unregisted from vDPA bus 133 + */ 134 + void vdpa_unregister_device(struct vdpa_device *vdev) 135 + { 136 + device_unregister(&vdev->dev); 137 + } 138 + EXPORT_SYMBOL_GPL(vdpa_unregister_device); 139 + 140 + /** 141 + * __vdpa_register_driver - register a vDPA device driver 142 + * @drv: the vdpa device driver to be registered 143 + * @owner: module owner of the driver 144 + * 145 + * Returns an err when fail to do the registration 146 + */ 147 + int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) 148 + { 149 + drv->driver.bus = &vdpa_bus; 150 + drv->driver.owner = owner; 151 + 152 + return driver_register(&drv->driver); 153 + } 154 + EXPORT_SYMBOL_GPL(__vdpa_register_driver); 155 + 156 + /** 157 + * vdpa_unregister_driver - unregister a vDPA device driver 158 + * @drv: the vdpa device driver to be unregistered 159 + */ 160 + void vdpa_unregister_driver(struct vdpa_driver *drv) 161 + { 162 + driver_unregister(&drv->driver); 163 + } 164 + EXPORT_SYMBOL_GPL(vdpa_unregister_driver); 165 + 166 + static int vdpa_init(void) 167 + { 168 + return bus_register(&vdpa_bus); 169 + } 170 + 171 + static void __exit vdpa_exit(void) 172 + { 173 + bus_unregister(&vdpa_bus); 174 + ida_destroy(&vdpa_index_ida); 175 + } 176 + core_initcall(vdpa_init); 177 + module_exit(vdpa_exit); 178 + 179 + MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); 180 + MODULE_LICENSE("GPL v2");
+2
drivers/vdpa/vdpa_sim/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
+629
drivers/vdpa/vdpa_sim/vdpa_sim.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * VDPA networking device simulator. 4 + * 5 + * Copyright (c) 2020, Red Hat Inc. All rights reserved. 6 + * Author: Jason Wang <jasowang@redhat.com> 7 + * 8 + */ 9 + 10 + #include <linux/init.h> 11 + #include <linux/module.h> 12 + #include <linux/device.h> 13 + #include <linux/kernel.h> 14 + #include <linux/fs.h> 15 + #include <linux/poll.h> 16 + #include <linux/slab.h> 17 + #include <linux/sched.h> 18 + #include <linux/wait.h> 19 + #include <linux/uuid.h> 20 + #include <linux/iommu.h> 21 + #include <linux/dma-mapping.h> 22 + #include <linux/sysfs.h> 23 + #include <linux/file.h> 24 + #include <linux/etherdevice.h> 25 + #include <linux/vringh.h> 26 + #include <linux/vdpa.h> 27 + #include <linux/vhost_iotlb.h> 28 + #include <uapi/linux/virtio_config.h> 29 + #include <uapi/linux/virtio_net.h> 30 + 31 + #define DRV_VERSION "0.1" 32 + #define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>" 33 + #define DRV_DESC "vDPA Device Simulator" 34 + #define DRV_LICENSE "GPL v2" 35 + 36 + struct vdpasim_virtqueue { 37 + struct vringh vring; 38 + struct vringh_kiov iov; 39 + unsigned short head; 40 + bool ready; 41 + u64 desc_addr; 42 + u64 device_addr; 43 + u64 driver_addr; 44 + u32 num; 45 + void *private; 46 + irqreturn_t (*cb)(void *data); 47 + }; 48 + 49 + #define VDPASIM_QUEUE_ALIGN PAGE_SIZE 50 + #define VDPASIM_QUEUE_MAX 256 51 + #define VDPASIM_DEVICE_ID 0x1 52 + #define VDPASIM_VENDOR_ID 0 53 + #define VDPASIM_VQ_NUM 0x2 54 + #define VDPASIM_NAME "vdpasim-netdev" 55 + 56 + static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | 57 + (1ULL << VIRTIO_F_VERSION_1) | 58 + (1ULL << VIRTIO_F_IOMMU_PLATFORM); 59 + 60 + /* State of each vdpasim device */ 61 + struct vdpasim { 62 + struct vdpa_device vdpa; 63 + struct vdpasim_virtqueue vqs[2]; 64 + struct work_struct work; 65 + /* spinlock to synchronize virtqueue state */ 66 + spinlock_t lock; 67 + struct virtio_net_config config; 68 + struct vhost_iotlb *iommu; 69 + void *buffer; 70 + u32 status; 71 + u32 generation; 72 + u64 features; 73 + }; 74 + 75 + static struct vdpasim *vdpasim_dev; 76 + 77 + static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) 78 + { 79 + return container_of(vdpa, struct vdpasim, vdpa); 80 + } 81 + 82 + static struct vdpasim *dev_to_sim(struct device *dev) 83 + { 84 + struct vdpa_device *vdpa = dev_to_vdpa(dev); 85 + 86 + return vdpa_to_sim(vdpa); 87 + } 88 + 89 + static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) 90 + { 91 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 92 + int ret; 93 + 94 + ret = vringh_init_iotlb(&vq->vring, vdpasim_features, 95 + VDPASIM_QUEUE_MAX, false, 96 + (struct vring_desc *)(uintptr_t)vq->desc_addr, 97 + (struct vring_avail *) 98 + (uintptr_t)vq->driver_addr, 99 + (struct vring_used *) 100 + (uintptr_t)vq->device_addr); 101 + } 102 + 103 + static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) 104 + { 105 + vq->ready = 0; 106 + vq->desc_addr = 0; 107 + vq->driver_addr = 0; 108 + vq->device_addr = 0; 109 + vq->cb = NULL; 110 + vq->private = NULL; 111 + vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, 112 + false, NULL, NULL, NULL); 113 + } 114 + 115 + static void vdpasim_reset(struct vdpasim *vdpasim) 116 + { 117 + int i; 118 + 119 + for (i = 0; i < VDPASIM_VQ_NUM; i++) 120 + vdpasim_vq_reset(&vdpasim->vqs[i]); 121 + 122 + vhost_iotlb_reset(vdpasim->iommu); 123 + 124 + vdpasim->features = 0; 125 + vdpasim->status = 0; 126 + ++vdpasim->generation; 127 + } 128 + 129 + static void vdpasim_work(struct work_struct *work) 130 + { 131 + struct vdpasim *vdpasim = container_of(work, struct 132 + vdpasim, work); 133 + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; 134 + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; 135 + size_t read, write, total_write; 136 + int err; 137 + int pkts = 0; 138 + 139 + spin_lock(&vdpasim->lock); 140 + 141 + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 142 + goto out; 143 + 144 + if (!txq->ready || !rxq->ready) 145 + goto out; 146 + 147 + while (true) { 148 + total_write = 0; 149 + err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, 150 + &txq->head, GFP_ATOMIC); 151 + if (err <= 0) 152 + break; 153 + 154 + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, 155 + &rxq->head, GFP_ATOMIC); 156 + if (err <= 0) { 157 + vringh_complete_iotlb(&txq->vring, txq->head, 0); 158 + break; 159 + } 160 + 161 + while (true) { 162 + read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, 163 + vdpasim->buffer, 164 + PAGE_SIZE); 165 + if (read <= 0) 166 + break; 167 + 168 + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, 169 + vdpasim->buffer, read); 170 + if (write <= 0) 171 + break; 172 + 173 + total_write += write; 174 + } 175 + 176 + /* Make sure data is wrote before advancing index */ 177 + smp_wmb(); 178 + 179 + vringh_complete_iotlb(&txq->vring, txq->head, 0); 180 + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); 181 + 182 + /* Make sure used is visible before rasing the interrupt. */ 183 + smp_wmb(); 184 + 185 + local_bh_disable(); 186 + if (txq->cb) 187 + txq->cb(txq->private); 188 + if (rxq->cb) 189 + rxq->cb(rxq->private); 190 + local_bh_enable(); 191 + 192 + if (++pkts > 4) { 193 + schedule_work(&vdpasim->work); 194 + goto out; 195 + } 196 + } 197 + 198 + out: 199 + spin_unlock(&vdpasim->lock); 200 + } 201 + 202 + static int dir_to_perm(enum dma_data_direction dir) 203 + { 204 + int perm = -EFAULT; 205 + 206 + switch (dir) { 207 + case DMA_FROM_DEVICE: 208 + perm = VHOST_MAP_WO; 209 + break; 210 + case DMA_TO_DEVICE: 211 + perm = VHOST_MAP_RO; 212 + break; 213 + case DMA_BIDIRECTIONAL: 214 + perm = VHOST_MAP_RW; 215 + break; 216 + default: 217 + break; 218 + } 219 + 220 + return perm; 221 + } 222 + 223 + static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, 224 + unsigned long offset, size_t size, 225 + enum dma_data_direction dir, 226 + unsigned long attrs) 227 + { 228 + struct vdpasim *vdpasim = dev_to_sim(dev); 229 + struct vhost_iotlb *iommu = vdpasim->iommu; 230 + u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; 231 + int ret, perm = dir_to_perm(dir); 232 + 233 + if (perm < 0) 234 + return DMA_MAPPING_ERROR; 235 + 236 + /* For simplicity, use identical mapping to avoid e.g iova 237 + * allocator. 238 + */ 239 + ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, 240 + pa, dir_to_perm(dir)); 241 + if (ret) 242 + return DMA_MAPPING_ERROR; 243 + 244 + return (dma_addr_t)(pa); 245 + } 246 + 247 + static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, 248 + size_t size, enum dma_data_direction dir, 249 + unsigned long attrs) 250 + { 251 + struct vdpasim *vdpasim = dev_to_sim(dev); 252 + struct vhost_iotlb *iommu = vdpasim->iommu; 253 + 254 + vhost_iotlb_del_range(iommu, (u64)dma_addr, 255 + (u64)dma_addr + size - 1); 256 + } 257 + 258 + static void *vdpasim_alloc_coherent(struct device *dev, size_t size, 259 + dma_addr_t *dma_addr, gfp_t flag, 260 + unsigned long attrs) 261 + { 262 + struct vdpasim *vdpasim = dev_to_sim(dev); 263 + struct vhost_iotlb *iommu = vdpasim->iommu; 264 + void *addr = kmalloc(size, flag); 265 + int ret; 266 + 267 + if (!addr) 268 + *dma_addr = DMA_MAPPING_ERROR; 269 + else { 270 + u64 pa = virt_to_phys(addr); 271 + 272 + ret = vhost_iotlb_add_range(iommu, (u64)pa, 273 + (u64)pa + size - 1, 274 + pa, VHOST_MAP_RW); 275 + if (ret) { 276 + *dma_addr = DMA_MAPPING_ERROR; 277 + kfree(addr); 278 + addr = NULL; 279 + } else 280 + *dma_addr = (dma_addr_t)pa; 281 + } 282 + 283 + return addr; 284 + } 285 + 286 + static void vdpasim_free_coherent(struct device *dev, size_t size, 287 + void *vaddr, dma_addr_t dma_addr, 288 + unsigned long attrs) 289 + { 290 + struct vdpasim *vdpasim = dev_to_sim(dev); 291 + struct vhost_iotlb *iommu = vdpasim->iommu; 292 + 293 + vhost_iotlb_del_range(iommu, (u64)dma_addr, 294 + (u64)dma_addr + size - 1); 295 + kfree(phys_to_virt((uintptr_t)dma_addr)); 296 + } 297 + 298 + static const struct dma_map_ops vdpasim_dma_ops = { 299 + .map_page = vdpasim_map_page, 300 + .unmap_page = vdpasim_unmap_page, 301 + .alloc = vdpasim_alloc_coherent, 302 + .free = vdpasim_free_coherent, 303 + }; 304 + 305 + static const struct vdpa_config_ops vdpasim_net_config_ops; 306 + 307 + static struct vdpasim *vdpasim_create(void) 308 + { 309 + struct virtio_net_config *config; 310 + struct vdpasim *vdpasim; 311 + struct device *dev; 312 + int ret = -ENOMEM; 313 + 314 + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, 315 + &vdpasim_net_config_ops); 316 + if (!vdpasim) 317 + goto err_alloc; 318 + 319 + INIT_WORK(&vdpasim->work, vdpasim_work); 320 + spin_lock_init(&vdpasim->lock); 321 + 322 + dev = &vdpasim->vdpa.dev; 323 + dev->coherent_dma_mask = DMA_BIT_MASK(64); 324 + set_dma_ops(dev, &vdpasim_dma_ops); 325 + 326 + vdpasim->iommu = vhost_iotlb_alloc(2048, 0); 327 + if (!vdpasim->iommu) 328 + goto err_iommu; 329 + 330 + vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); 331 + if (!vdpasim->buffer) 332 + goto err_iommu; 333 + 334 + config = &vdpasim->config; 335 + config->mtu = 1500; 336 + config->status = VIRTIO_NET_S_LINK_UP; 337 + eth_random_addr(config->mac); 338 + 339 + vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); 340 + vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); 341 + 342 + vdpasim->vdpa.dma_dev = dev; 343 + ret = vdpa_register_device(&vdpasim->vdpa); 344 + if (ret) 345 + goto err_iommu; 346 + 347 + return vdpasim; 348 + 349 + err_iommu: 350 + put_device(dev); 351 + err_alloc: 352 + return ERR_PTR(ret); 353 + } 354 + 355 + static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, 356 + u64 desc_area, u64 driver_area, 357 + u64 device_area) 358 + { 359 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 360 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 361 + 362 + vq->desc_addr = desc_area; 363 + vq->driver_addr = driver_area; 364 + vq->device_addr = device_area; 365 + 366 + return 0; 367 + } 368 + 369 + static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 370 + { 371 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 372 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 373 + 374 + vq->num = num; 375 + } 376 + 377 + static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) 378 + { 379 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 380 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 381 + 382 + if (vq->ready) 383 + schedule_work(&vdpasim->work); 384 + } 385 + 386 + static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 387 + struct vdpa_callback *cb) 388 + { 389 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 390 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 391 + 392 + vq->cb = cb->callback; 393 + vq->private = cb->private; 394 + } 395 + 396 + static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) 397 + { 398 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 399 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 400 + 401 + spin_lock(&vdpasim->lock); 402 + vq->ready = ready; 403 + if (vq->ready) 404 + vdpasim_queue_ready(vdpasim, idx); 405 + spin_unlock(&vdpasim->lock); 406 + } 407 + 408 + static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 409 + { 410 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 411 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 412 + 413 + return vq->ready; 414 + } 415 + 416 + static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state) 417 + { 418 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 419 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 420 + struct vringh *vrh = &vq->vring; 421 + 422 + spin_lock(&vdpasim->lock); 423 + vrh->last_avail_idx = state; 424 + spin_unlock(&vdpasim->lock); 425 + 426 + return 0; 427 + } 428 + 429 + static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) 430 + { 431 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 432 + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 433 + struct vringh *vrh = &vq->vring; 434 + 435 + return vrh->last_avail_idx; 436 + } 437 + 438 + static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa) 439 + { 440 + return VDPASIM_QUEUE_ALIGN; 441 + } 442 + 443 + static u64 vdpasim_get_features(struct vdpa_device *vdpa) 444 + { 445 + return vdpasim_features; 446 + } 447 + 448 + static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) 449 + { 450 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 451 + 452 + /* DMA mapping must be done by driver */ 453 + if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) 454 + return -EINVAL; 455 + 456 + vdpasim->features = features & vdpasim_features; 457 + 458 + return 0; 459 + } 460 + 461 + static void vdpasim_set_config_cb(struct vdpa_device *vdpa, 462 + struct vdpa_callback *cb) 463 + { 464 + /* We don't support config interrupt */ 465 + } 466 + 467 + static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) 468 + { 469 + return VDPASIM_QUEUE_MAX; 470 + } 471 + 472 + static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) 473 + { 474 + return VDPASIM_DEVICE_ID; 475 + } 476 + 477 + static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) 478 + { 479 + return VDPASIM_VENDOR_ID; 480 + } 481 + 482 + static u8 vdpasim_get_status(struct vdpa_device *vdpa) 483 + { 484 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 485 + u8 status; 486 + 487 + spin_lock(&vdpasim->lock); 488 + status = vdpasim->status; 489 + spin_unlock(&vdpasim->lock); 490 + 491 + return vdpasim->status; 492 + } 493 + 494 + static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) 495 + { 496 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 497 + 498 + spin_lock(&vdpasim->lock); 499 + vdpasim->status = status; 500 + if (status == 0) 501 + vdpasim_reset(vdpasim); 502 + spin_unlock(&vdpasim->lock); 503 + } 504 + 505 + static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, 506 + void *buf, unsigned int len) 507 + { 508 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 509 + 510 + if (offset + len < sizeof(struct virtio_net_config)) 511 + memcpy(buf, &vdpasim->config + offset, len); 512 + } 513 + 514 + static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, 515 + const void *buf, unsigned int len) 516 + { 517 + /* No writable config supportted by vdpasim */ 518 + } 519 + 520 + static u32 vdpasim_get_generation(struct vdpa_device *vdpa) 521 + { 522 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 523 + 524 + return vdpasim->generation; 525 + } 526 + 527 + static int vdpasim_set_map(struct vdpa_device *vdpa, 528 + struct vhost_iotlb *iotlb) 529 + { 530 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 531 + struct vhost_iotlb_map *map; 532 + u64 start = 0ULL, last = 0ULL - 1; 533 + int ret; 534 + 535 + vhost_iotlb_reset(vdpasim->iommu); 536 + 537 + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 538 + map = vhost_iotlb_itree_next(map, start, last)) { 539 + ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, 540 + map->last, map->addr, map->perm); 541 + if (ret) 542 + goto err; 543 + } 544 + return 0; 545 + 546 + err: 547 + vhost_iotlb_reset(vdpasim->iommu); 548 + return ret; 549 + } 550 + 551 + static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, 552 + u64 pa, u32 perm) 553 + { 554 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 555 + 556 + return vhost_iotlb_add_range(vdpasim->iommu, iova, 557 + iova + size - 1, pa, perm); 558 + } 559 + 560 + static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) 561 + { 562 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 563 + 564 + vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); 565 + 566 + return 0; 567 + } 568 + 569 + static void vdpasim_free(struct vdpa_device *vdpa) 570 + { 571 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 572 + 573 + cancel_work_sync(&vdpasim->work); 574 + kfree(vdpasim->buffer); 575 + if (vdpasim->iommu) 576 + vhost_iotlb_free(vdpasim->iommu); 577 + } 578 + 579 + static const struct vdpa_config_ops vdpasim_net_config_ops = { 580 + .set_vq_address = vdpasim_set_vq_address, 581 + .set_vq_num = vdpasim_set_vq_num, 582 + .kick_vq = vdpasim_kick_vq, 583 + .set_vq_cb = vdpasim_set_vq_cb, 584 + .set_vq_ready = vdpasim_set_vq_ready, 585 + .get_vq_ready = vdpasim_get_vq_ready, 586 + .set_vq_state = vdpasim_set_vq_state, 587 + .get_vq_state = vdpasim_get_vq_state, 588 + .get_vq_align = vdpasim_get_vq_align, 589 + .get_features = vdpasim_get_features, 590 + .set_features = vdpasim_set_features, 591 + .set_config_cb = vdpasim_set_config_cb, 592 + .get_vq_num_max = vdpasim_get_vq_num_max, 593 + .get_device_id = vdpasim_get_device_id, 594 + .get_vendor_id = vdpasim_get_vendor_id, 595 + .get_status = vdpasim_get_status, 596 + .set_status = vdpasim_set_status, 597 + .get_config = vdpasim_get_config, 598 + .set_config = vdpasim_set_config, 599 + .get_generation = vdpasim_get_generation, 600 + .set_map = vdpasim_set_map, 601 + .dma_map = vdpasim_dma_map, 602 + .dma_unmap = vdpasim_dma_unmap, 603 + .free = vdpasim_free, 604 + }; 605 + 606 + static int __init vdpasim_dev_init(void) 607 + { 608 + vdpasim_dev = vdpasim_create(); 609 + 610 + if (!IS_ERR(vdpasim_dev)) 611 + return 0; 612 + 613 + return PTR_ERR(vdpasim_dev); 614 + } 615 + 616 + static void __exit vdpasim_dev_exit(void) 617 + { 618 + struct vdpa_device *vdpa = &vdpasim_dev->vdpa; 619 + 620 + vdpa_unregister_device(vdpa); 621 + } 622 + 623 + module_init(vdpasim_dev_init) 624 + module_exit(vdpasim_dev_exit) 625 + 626 + MODULE_VERSION(DRV_VERSION); 627 + MODULE_LICENSE(DRV_LICENSE); 628 + MODULE_AUTHOR(DRV_AUTHOR); 629 + MODULE_DESCRIPTION(DRV_DESC);
+39 -6
drivers/vhost/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 + config VHOST_IOTLB 3 + tristate 4 + help 5 + Generic IOTLB implementation for vhost and vringh. 6 + 7 + config VHOST_RING 8 + tristate 9 + select VHOST_IOTLB 10 + help 11 + This option is selected by any driver which needs to access 12 + the host side of a virtio ring. 13 + 14 + config VHOST 15 + tristate 16 + select VHOST_IOTLB 17 + help 18 + This option is selected by any driver which needs to access 19 + the core of vhost. 20 + 21 + menuconfig VHOST_MENU 22 + bool "VHOST drivers" 23 + default y 24 + 25 + if VHOST_MENU 26 + 2 27 config VHOST_NET 3 28 tristate "Host kernel accelerator for virtio net" 4 29 depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) ··· 48 23 config VHOST_VSOCK 49 24 tristate "vhost virtio-vsock driver" 50 25 depends on VSOCKETS && EVENTFD 51 - select VIRTIO_VSOCKETS_COMMON 52 26 select VHOST 27 + select VIRTIO_VSOCKETS_COMMON 53 28 default n 54 29 ---help--- 55 30 This kernel module can be loaded in the host kernel to provide AF_VSOCK ··· 59 34 To compile this driver as a module, choose M here: the module will be called 60 35 vhost_vsock. 61 36 62 - config VHOST 63 - tristate 64 - ---help--- 65 - This option is selected by any driver which needs to access 66 - the core of vhost. 37 + config VHOST_VDPA 38 + tristate "Vhost driver for vDPA-based backend" 39 + depends on EVENTFD 40 + select VHOST 41 + select VDPA 42 + help 43 + This kernel module can be loaded in host kernel to accelerate 44 + guest virtio devices with the vDPA-based backends. 45 + 46 + To compile this driver as a module, choose M here: the module 47 + will be called vhost_vdpa. 67 48 68 49 config VHOST_CROSS_ENDIAN_LEGACY 69 50 bool "Cross-endian support for vhost" ··· 85 54 adds some overhead, it is disabled by default. 86 55 87 56 If unsure, say "N". 57 + 58 + endif
-6
drivers/vhost/Kconfig.vringh
··· 1 - # SPDX-License-Identifier: GPL-2.0-only 2 - config VHOST_RING 3 - tristate 4 - ---help--- 5 - This option is selected by any driver which needs to access 6 - the host side of a virtio ring.
+6
drivers/vhost/Makefile
··· 10 10 11 11 obj-$(CONFIG_VHOST_RING) += vringh.o 12 12 13 + obj-$(CONFIG_VHOST_VDPA) += vhost_vdpa.o 14 + vhost_vdpa-y := vdpa.o 15 + 13 16 obj-$(CONFIG_VHOST) += vhost.o 17 + 18 + obj-$(CONFIG_VHOST_IOTLB) += vhost_iotlb.o 19 + vhost_iotlb-y := iotlb.o
+177
drivers/vhost/iotlb.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (C) 2020 Red Hat, Inc. 3 + * Author: Jason Wang <jasowang@redhat.com> 4 + * 5 + * IOTLB implementation for vhost. 6 + */ 7 + #include <linux/slab.h> 8 + #include <linux/vhost_iotlb.h> 9 + #include <linux/module.h> 10 + 11 + #define MOD_VERSION "0.1" 12 + #define MOD_DESC "VHOST IOTLB" 13 + #define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>" 14 + #define MOD_LICENSE "GPL v2" 15 + 16 + #define START(map) ((map)->start) 17 + #define LAST(map) ((map)->last) 18 + 19 + INTERVAL_TREE_DEFINE(struct vhost_iotlb_map, 20 + rb, __u64, __subtree_last, 21 + START, LAST, static inline, vhost_iotlb_itree); 22 + 23 + /** 24 + * vhost_iotlb_map_free - remove a map node and free it 25 + * @iotlb: the IOTLB 26 + * @map: the map that want to be remove and freed 27 + */ 28 + void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, 29 + struct vhost_iotlb_map *map) 30 + { 31 + vhost_iotlb_itree_remove(map, &iotlb->root); 32 + list_del(&map->link); 33 + kfree(map); 34 + iotlb->nmaps--; 35 + } 36 + EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); 37 + 38 + /** 39 + * vhost_iotlb_add_range - add a new range to vhost IOTLB 40 + * @iotlb: the IOTLB 41 + * @start: start of the IOVA range 42 + * @last: last of IOVA range 43 + * @addr: the address that is mapped to @start 44 + * @perm: access permission of this range 45 + * 46 + * Returns an error last is smaller than start or memory allocation 47 + * fails 48 + */ 49 + int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, 50 + u64 start, u64 last, 51 + u64 addr, unsigned int perm) 52 + { 53 + struct vhost_iotlb_map *map; 54 + 55 + if (last < start) 56 + return -EFAULT; 57 + 58 + if (iotlb->limit && 59 + iotlb->nmaps == iotlb->limit && 60 + iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) { 61 + map = list_first_entry(&iotlb->list, typeof(*map), link); 62 + vhost_iotlb_map_free(iotlb, map); 63 + } 64 + 65 + map = kmalloc(sizeof(*map), GFP_ATOMIC); 66 + if (!map) 67 + return -ENOMEM; 68 + 69 + map->start = start; 70 + map->size = last - start + 1; 71 + map->last = last; 72 + map->addr = addr; 73 + map->perm = perm; 74 + 75 + iotlb->nmaps++; 76 + vhost_iotlb_itree_insert(map, &iotlb->root); 77 + 78 + INIT_LIST_HEAD(&map->link); 79 + list_add_tail(&map->link, &iotlb->list); 80 + 81 + return 0; 82 + } 83 + EXPORT_SYMBOL_GPL(vhost_iotlb_add_range); 84 + 85 + /** 86 + * vring_iotlb_del_range - delete overlapped ranges from vhost IOTLB 87 + * @iotlb: the IOTLB 88 + * @start: start of the IOVA range 89 + * @last: last of IOVA range 90 + */ 91 + void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last) 92 + { 93 + struct vhost_iotlb_map *map; 94 + 95 + while ((map = vhost_iotlb_itree_iter_first(&iotlb->root, 96 + start, last))) 97 + vhost_iotlb_map_free(iotlb, map); 98 + } 99 + EXPORT_SYMBOL_GPL(vhost_iotlb_del_range); 100 + 101 + /** 102 + * vhost_iotlb_alloc - add a new vhost IOTLB 103 + * @limit: maximum number of IOTLB entries 104 + * @flags: VHOST_IOTLB_FLAG_XXX 105 + * 106 + * Returns an error is memory allocation fails 107 + */ 108 + struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags) 109 + { 110 + struct vhost_iotlb *iotlb = kzalloc(sizeof(*iotlb), GFP_KERNEL); 111 + 112 + if (!iotlb) 113 + return NULL; 114 + 115 + iotlb->root = RB_ROOT_CACHED; 116 + iotlb->limit = limit; 117 + iotlb->nmaps = 0; 118 + iotlb->flags = flags; 119 + INIT_LIST_HEAD(&iotlb->list); 120 + 121 + return iotlb; 122 + } 123 + EXPORT_SYMBOL_GPL(vhost_iotlb_alloc); 124 + 125 + /** 126 + * vhost_iotlb_reset - reset vhost IOTLB (free all IOTLB entries) 127 + * @iotlb: the IOTLB to be reset 128 + */ 129 + void vhost_iotlb_reset(struct vhost_iotlb *iotlb) 130 + { 131 + vhost_iotlb_del_range(iotlb, 0ULL, 0ULL - 1); 132 + } 133 + EXPORT_SYMBOL_GPL(vhost_iotlb_reset); 134 + 135 + /** 136 + * vhost_iotlb_free - reset and free vhost IOTLB 137 + * @iotlb: the IOTLB to be freed 138 + */ 139 + void vhost_iotlb_free(struct vhost_iotlb *iotlb) 140 + { 141 + if (iotlb) { 142 + vhost_iotlb_reset(iotlb); 143 + kfree(iotlb); 144 + } 145 + } 146 + EXPORT_SYMBOL_GPL(vhost_iotlb_free); 147 + 148 + /** 149 + * vhost_iotlb_itree_first - return the first overlapped range 150 + * @iotlb: the IOTLB 151 + * @start: start of IOVA range 152 + * @end: end of IOVA range 153 + */ 154 + struct vhost_iotlb_map * 155 + vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last) 156 + { 157 + return vhost_iotlb_itree_iter_first(&iotlb->root, start, last); 158 + } 159 + EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first); 160 + 161 + /** 162 + * vhost_iotlb_itree_first - return the next overlapped range 163 + * @iotlb: the IOTLB 164 + * @start: start of IOVA range 165 + * @end: end of IOVA range 166 + */ 167 + struct vhost_iotlb_map * 168 + vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last) 169 + { 170 + return vhost_iotlb_itree_iter_next(map, start, last); 171 + } 172 + EXPORT_SYMBOL_GPL(vhost_iotlb_itree_next); 173 + 174 + MODULE_VERSION(MOD_VERSION); 175 + MODULE_DESCRIPTION(MOD_DESC); 176 + MODULE_AUTHOR(MOD_AUTHOR); 177 + MODULE_LICENSE(MOD_LICENSE);
+3 -2
drivers/vhost/net.c
··· 1324 1324 } 1325 1325 vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, 1326 1326 UIO_MAXIOV + VHOST_NET_BATCH, 1327 - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); 1327 + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, 1328 + NULL); 1328 1329 1329 1330 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); 1330 1331 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); ··· 1587 1586 struct socket *tx_sock = NULL; 1588 1587 struct socket *rx_sock = NULL; 1589 1588 long err; 1590 - struct vhost_umem *umem; 1589 + struct vhost_iotlb *umem; 1591 1590 1592 1591 mutex_lock(&n->dev.mutex); 1593 1592 err = vhost_dev_check_owner(&n->dev);
+1 -1
drivers/vhost/scsi.c
··· 1628 1628 vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; 1629 1629 } 1630 1630 vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, 1631 - VHOST_SCSI_WEIGHT, 0); 1631 + VHOST_SCSI_WEIGHT, 0, NULL); 1632 1632 1633 1633 vhost_scsi_init_inflight(vs, NULL); 1634 1634
+883
drivers/vhost/vdpa.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2018-2020 Intel Corporation. 4 + * Copyright (C) 2020 Red Hat, Inc. 5 + * 6 + * Author: Tiwei Bie <tiwei.bie@intel.com> 7 + * Jason Wang <jasowang@redhat.com> 8 + * 9 + * Thanks Michael S. Tsirkin for the valuable comments and 10 + * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 + * their supports. 12 + */ 13 + 14 + #include <linux/kernel.h> 15 + #include <linux/module.h> 16 + #include <linux/cdev.h> 17 + #include <linux/device.h> 18 + #include <linux/iommu.h> 19 + #include <linux/uuid.h> 20 + #include <linux/vdpa.h> 21 + #include <linux/nospec.h> 22 + #include <linux/vhost.h> 23 + #include <linux/virtio_net.h> 24 + 25 + #include "vhost.h" 26 + 27 + enum { 28 + VHOST_VDPA_FEATURES = 29 + (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | 30 + (1ULL << VIRTIO_F_ANY_LAYOUT) | 31 + (1ULL << VIRTIO_F_VERSION_1) | 32 + (1ULL << VIRTIO_F_IOMMU_PLATFORM) | 33 + (1ULL << VIRTIO_F_RING_PACKED) | 34 + (1ULL << VIRTIO_F_ORDER_PLATFORM) | 35 + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | 36 + (1ULL << VIRTIO_RING_F_EVENT_IDX), 37 + 38 + VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES | 39 + (1ULL << VIRTIO_NET_F_CSUM) | 40 + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 41 + (1ULL << VIRTIO_NET_F_MTU) | 42 + (1ULL << VIRTIO_NET_F_MAC) | 43 + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 44 + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 45 + (1ULL << VIRTIO_NET_F_GUEST_ECN) | 46 + (1ULL << VIRTIO_NET_F_GUEST_UFO) | 47 + (1ULL << VIRTIO_NET_F_HOST_TSO4) | 48 + (1ULL << VIRTIO_NET_F_HOST_TSO6) | 49 + (1ULL << VIRTIO_NET_F_HOST_ECN) | 50 + (1ULL << VIRTIO_NET_F_HOST_UFO) | 51 + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | 52 + (1ULL << VIRTIO_NET_F_STATUS) | 53 + (1ULL << VIRTIO_NET_F_SPEED_DUPLEX), 54 + }; 55 + 56 + /* Currently, only network backend w/o multiqueue is supported. */ 57 + #define VHOST_VDPA_VQ_MAX 2 58 + 59 + #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 60 + 61 + struct vhost_vdpa { 62 + struct vhost_dev vdev; 63 + struct iommu_domain *domain; 64 + struct vhost_virtqueue *vqs; 65 + struct completion completion; 66 + struct vdpa_device *vdpa; 67 + struct device dev; 68 + struct cdev cdev; 69 + atomic_t opened; 70 + int nvqs; 71 + int virtio_id; 72 + int minor; 73 + }; 74 + 75 + static DEFINE_IDA(vhost_vdpa_ida); 76 + 77 + static dev_t vhost_vdpa_major; 78 + 79 + static const u64 vhost_vdpa_features[] = { 80 + [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES, 81 + }; 82 + 83 + static void handle_vq_kick(struct vhost_work *work) 84 + { 85 + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 86 + poll.work); 87 + struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 88 + const struct vdpa_config_ops *ops = v->vdpa->config; 89 + 90 + ops->kick_vq(v->vdpa, vq - v->vqs); 91 + } 92 + 93 + static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 94 + { 95 + struct vhost_virtqueue *vq = private; 96 + struct eventfd_ctx *call_ctx = vq->call_ctx; 97 + 98 + if (call_ctx) 99 + eventfd_signal(call_ctx, 1); 100 + 101 + return IRQ_HANDLED; 102 + } 103 + 104 + static void vhost_vdpa_reset(struct vhost_vdpa *v) 105 + { 106 + struct vdpa_device *vdpa = v->vdpa; 107 + const struct vdpa_config_ops *ops = vdpa->config; 108 + 109 + ops->set_status(vdpa, 0); 110 + } 111 + 112 + static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 113 + { 114 + struct vdpa_device *vdpa = v->vdpa; 115 + const struct vdpa_config_ops *ops = vdpa->config; 116 + u32 device_id; 117 + 118 + device_id = ops->get_device_id(vdpa); 119 + 120 + if (copy_to_user(argp, &device_id, sizeof(device_id))) 121 + return -EFAULT; 122 + 123 + return 0; 124 + } 125 + 126 + static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 127 + { 128 + struct vdpa_device *vdpa = v->vdpa; 129 + const struct vdpa_config_ops *ops = vdpa->config; 130 + u8 status; 131 + 132 + status = ops->get_status(vdpa); 133 + 134 + if (copy_to_user(statusp, &status, sizeof(status))) 135 + return -EFAULT; 136 + 137 + return 0; 138 + } 139 + 140 + static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 141 + { 142 + struct vdpa_device *vdpa = v->vdpa; 143 + const struct vdpa_config_ops *ops = vdpa->config; 144 + u8 status; 145 + 146 + if (copy_from_user(&status, statusp, sizeof(status))) 147 + return -EFAULT; 148 + 149 + /* 150 + * Userspace shouldn't remove status bits unless reset the 151 + * status to 0. 152 + */ 153 + if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) 154 + return -EINVAL; 155 + 156 + ops->set_status(vdpa, status); 157 + 158 + return 0; 159 + } 160 + 161 + static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 162 + struct vhost_vdpa_config *c) 163 + { 164 + long size = 0; 165 + 166 + switch (v->virtio_id) { 167 + case VIRTIO_ID_NET: 168 + size = sizeof(struct virtio_net_config); 169 + break; 170 + } 171 + 172 + if (c->len == 0) 173 + return -EINVAL; 174 + 175 + if (c->len > size - c->off) 176 + return -E2BIG; 177 + 178 + return 0; 179 + } 180 + 181 + static long vhost_vdpa_get_config(struct vhost_vdpa *v, 182 + struct vhost_vdpa_config __user *c) 183 + { 184 + struct vdpa_device *vdpa = v->vdpa; 185 + const struct vdpa_config_ops *ops = vdpa->config; 186 + struct vhost_vdpa_config config; 187 + unsigned long size = offsetof(struct vhost_vdpa_config, buf); 188 + u8 *buf; 189 + 190 + if (copy_from_user(&config, c, size)) 191 + return -EFAULT; 192 + if (vhost_vdpa_config_validate(v, &config)) 193 + return -EINVAL; 194 + buf = kvzalloc(config.len, GFP_KERNEL); 195 + if (!buf) 196 + return -ENOMEM; 197 + 198 + ops->get_config(vdpa, config.off, buf, config.len); 199 + 200 + if (copy_to_user(c->buf, buf, config.len)) { 201 + kvfree(buf); 202 + return -EFAULT; 203 + } 204 + 205 + kvfree(buf); 206 + return 0; 207 + } 208 + 209 + static long vhost_vdpa_set_config(struct vhost_vdpa *v, 210 + struct vhost_vdpa_config __user *c) 211 + { 212 + struct vdpa_device *vdpa = v->vdpa; 213 + const struct vdpa_config_ops *ops = vdpa->config; 214 + struct vhost_vdpa_config config; 215 + unsigned long size = offsetof(struct vhost_vdpa_config, buf); 216 + u8 *buf; 217 + 218 + if (copy_from_user(&config, c, size)) 219 + return -EFAULT; 220 + if (vhost_vdpa_config_validate(v, &config)) 221 + return -EINVAL; 222 + buf = kvzalloc(config.len, GFP_KERNEL); 223 + if (!buf) 224 + return -ENOMEM; 225 + 226 + if (copy_from_user(buf, c->buf, config.len)) { 227 + kvfree(buf); 228 + return -EFAULT; 229 + } 230 + 231 + ops->set_config(vdpa, config.off, buf, config.len); 232 + 233 + kvfree(buf); 234 + return 0; 235 + } 236 + 237 + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 238 + { 239 + struct vdpa_device *vdpa = v->vdpa; 240 + const struct vdpa_config_ops *ops = vdpa->config; 241 + u64 features; 242 + 243 + features = ops->get_features(vdpa); 244 + features &= vhost_vdpa_features[v->virtio_id]; 245 + 246 + if (copy_to_user(featurep, &features, sizeof(features))) 247 + return -EFAULT; 248 + 249 + return 0; 250 + } 251 + 252 + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 253 + { 254 + struct vdpa_device *vdpa = v->vdpa; 255 + const struct vdpa_config_ops *ops = vdpa->config; 256 + u64 features; 257 + 258 + /* 259 + * It's not allowed to change the features after they have 260 + * been negotiated. 261 + */ 262 + if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 263 + return -EBUSY; 264 + 265 + if (copy_from_user(&features, featurep, sizeof(features))) 266 + return -EFAULT; 267 + 268 + if (features & ~vhost_vdpa_features[v->virtio_id]) 269 + return -EINVAL; 270 + 271 + if (ops->set_features(vdpa, features)) 272 + return -EINVAL; 273 + 274 + return 0; 275 + } 276 + 277 + static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 278 + { 279 + struct vdpa_device *vdpa = v->vdpa; 280 + const struct vdpa_config_ops *ops = vdpa->config; 281 + u16 num; 282 + 283 + num = ops->get_vq_num_max(vdpa); 284 + 285 + if (copy_to_user(argp, &num, sizeof(num))) 286 + return -EFAULT; 287 + 288 + return 0; 289 + } 290 + 291 + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 292 + void __user *argp) 293 + { 294 + struct vdpa_device *vdpa = v->vdpa; 295 + const struct vdpa_config_ops *ops = vdpa->config; 296 + struct vdpa_callback cb; 297 + struct vhost_virtqueue *vq; 298 + struct vhost_vring_state s; 299 + u8 status; 300 + u32 idx; 301 + long r; 302 + 303 + r = get_user(idx, (u32 __user *)argp); 304 + if (r < 0) 305 + return r; 306 + 307 + if (idx >= v->nvqs) 308 + return -ENOBUFS; 309 + 310 + idx = array_index_nospec(idx, v->nvqs); 311 + vq = &v->vqs[idx]; 312 + 313 + status = ops->get_status(vdpa); 314 + 315 + if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { 316 + if (copy_from_user(&s, argp, sizeof(s))) 317 + return -EFAULT; 318 + ops->set_vq_ready(vdpa, idx, s.num); 319 + return 0; 320 + } 321 + 322 + if (cmd == VHOST_GET_VRING_BASE) 323 + vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); 324 + 325 + r = vhost_vring_ioctl(&v->vdev, cmd, argp); 326 + if (r) 327 + return r; 328 + 329 + switch (cmd) { 330 + case VHOST_SET_VRING_ADDR: 331 + if (ops->set_vq_address(vdpa, idx, 332 + (u64)(uintptr_t)vq->desc, 333 + (u64)(uintptr_t)vq->avail, 334 + (u64)(uintptr_t)vq->used)) 335 + r = -EINVAL; 336 + break; 337 + 338 + case VHOST_SET_VRING_BASE: 339 + if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx)) 340 + r = -EINVAL; 341 + break; 342 + 343 + case VHOST_SET_VRING_CALL: 344 + if (vq->call_ctx) { 345 + cb.callback = vhost_vdpa_virtqueue_cb; 346 + cb.private = vq; 347 + } else { 348 + cb.callback = NULL; 349 + cb.private = NULL; 350 + } 351 + ops->set_vq_cb(vdpa, idx, &cb); 352 + break; 353 + 354 + case VHOST_SET_VRING_NUM: 355 + ops->set_vq_num(vdpa, idx, vq->num); 356 + break; 357 + } 358 + 359 + return r; 360 + } 361 + 362 + static long vhost_vdpa_unlocked_ioctl(struct file *filep, 363 + unsigned int cmd, unsigned long arg) 364 + { 365 + struct vhost_vdpa *v = filep->private_data; 366 + struct vhost_dev *d = &v->vdev; 367 + void __user *argp = (void __user *)arg; 368 + long r; 369 + 370 + mutex_lock(&d->mutex); 371 + 372 + switch (cmd) { 373 + case VHOST_VDPA_GET_DEVICE_ID: 374 + r = vhost_vdpa_get_device_id(v, argp); 375 + break; 376 + case VHOST_VDPA_GET_STATUS: 377 + r = vhost_vdpa_get_status(v, argp); 378 + break; 379 + case VHOST_VDPA_SET_STATUS: 380 + r = vhost_vdpa_set_status(v, argp); 381 + break; 382 + case VHOST_VDPA_GET_CONFIG: 383 + r = vhost_vdpa_get_config(v, argp); 384 + break; 385 + case VHOST_VDPA_SET_CONFIG: 386 + r = vhost_vdpa_set_config(v, argp); 387 + break; 388 + case VHOST_GET_FEATURES: 389 + r = vhost_vdpa_get_features(v, argp); 390 + break; 391 + case VHOST_SET_FEATURES: 392 + r = vhost_vdpa_set_features(v, argp); 393 + break; 394 + case VHOST_VDPA_GET_VRING_NUM: 395 + r = vhost_vdpa_get_vring_num(v, argp); 396 + break; 397 + case VHOST_SET_LOG_BASE: 398 + case VHOST_SET_LOG_FD: 399 + r = -ENOIOCTLCMD; 400 + break; 401 + default: 402 + r = vhost_dev_ioctl(&v->vdev, cmd, argp); 403 + if (r == -ENOIOCTLCMD) 404 + r = vhost_vdpa_vring_ioctl(v, cmd, argp); 405 + break; 406 + } 407 + 408 + mutex_unlock(&d->mutex); 409 + return r; 410 + } 411 + 412 + static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 413 + { 414 + struct vhost_dev *dev = &v->vdev; 415 + struct vhost_iotlb *iotlb = dev->iotlb; 416 + struct vhost_iotlb_map *map; 417 + struct page *page; 418 + unsigned long pfn, pinned; 419 + 420 + while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 421 + pinned = map->size >> PAGE_SHIFT; 422 + for (pfn = map->addr >> PAGE_SHIFT; 423 + pinned > 0; pfn++, pinned--) { 424 + page = pfn_to_page(pfn); 425 + if (map->perm & VHOST_ACCESS_WO) 426 + set_page_dirty_lock(page); 427 + unpin_user_page(page); 428 + } 429 + atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); 430 + vhost_iotlb_map_free(iotlb, map); 431 + } 432 + } 433 + 434 + static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 435 + { 436 + struct vhost_dev *dev = &v->vdev; 437 + 438 + vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 439 + kfree(dev->iotlb); 440 + dev->iotlb = NULL; 441 + } 442 + 443 + static int perm_to_iommu_flags(u32 perm) 444 + { 445 + int flags = 0; 446 + 447 + switch (perm) { 448 + case VHOST_ACCESS_WO: 449 + flags |= IOMMU_WRITE; 450 + break; 451 + case VHOST_ACCESS_RO: 452 + flags |= IOMMU_READ; 453 + break; 454 + case VHOST_ACCESS_RW: 455 + flags |= (IOMMU_WRITE | IOMMU_READ); 456 + break; 457 + default: 458 + WARN(1, "invalidate vhost IOTLB permission\n"); 459 + break; 460 + } 461 + 462 + return flags | IOMMU_CACHE; 463 + } 464 + 465 + static int vhost_vdpa_map(struct vhost_vdpa *v, 466 + u64 iova, u64 size, u64 pa, u32 perm) 467 + { 468 + struct vhost_dev *dev = &v->vdev; 469 + struct vdpa_device *vdpa = v->vdpa; 470 + const struct vdpa_config_ops *ops = vdpa->config; 471 + int r = 0; 472 + 473 + r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, 474 + pa, perm); 475 + if (r) 476 + return r; 477 + 478 + if (ops->dma_map) 479 + r = ops->dma_map(vdpa, iova, size, pa, perm); 480 + else if (ops->set_map) 481 + r = ops->set_map(vdpa, dev->iotlb); 482 + else 483 + r = iommu_map(v->domain, iova, pa, size, 484 + perm_to_iommu_flags(perm)); 485 + 486 + return r; 487 + } 488 + 489 + static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 490 + { 491 + struct vhost_dev *dev = &v->vdev; 492 + struct vdpa_device *vdpa = v->vdpa; 493 + const struct vdpa_config_ops *ops = vdpa->config; 494 + 495 + vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 496 + 497 + if (ops->dma_map) 498 + ops->dma_unmap(vdpa, iova, size); 499 + else if (ops->set_map) 500 + ops->set_map(vdpa, dev->iotlb); 501 + else 502 + iommu_unmap(v->domain, iova, size); 503 + } 504 + 505 + static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 506 + struct vhost_iotlb_msg *msg) 507 + { 508 + struct vhost_dev *dev = &v->vdev; 509 + struct vhost_iotlb *iotlb = dev->iotlb; 510 + struct page **page_list; 511 + unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 512 + unsigned int gup_flags = FOLL_LONGTERM; 513 + unsigned long npages, cur_base, map_pfn, last_pfn = 0; 514 + unsigned long locked, lock_limit, pinned, i; 515 + u64 iova = msg->iova; 516 + int ret = 0; 517 + 518 + if (vhost_iotlb_itree_first(iotlb, msg->iova, 519 + msg->iova + msg->size - 1)) 520 + return -EEXIST; 521 + 522 + page_list = (struct page **) __get_free_page(GFP_KERNEL); 523 + if (!page_list) 524 + return -ENOMEM; 525 + 526 + if (msg->perm & VHOST_ACCESS_WO) 527 + gup_flags |= FOLL_WRITE; 528 + 529 + npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; 530 + if (!npages) 531 + return -EINVAL; 532 + 533 + down_read(&dev->mm->mmap_sem); 534 + 535 + locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 536 + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 537 + 538 + if (locked > lock_limit) { 539 + ret = -ENOMEM; 540 + goto out; 541 + } 542 + 543 + cur_base = msg->uaddr & PAGE_MASK; 544 + iova &= PAGE_MASK; 545 + 546 + while (npages) { 547 + pinned = min_t(unsigned long, npages, list_size); 548 + ret = pin_user_pages(cur_base, pinned, 549 + gup_flags, page_list, NULL); 550 + if (ret != pinned) 551 + goto out; 552 + 553 + if (!last_pfn) 554 + map_pfn = page_to_pfn(page_list[0]); 555 + 556 + for (i = 0; i < ret; i++) { 557 + unsigned long this_pfn = page_to_pfn(page_list[i]); 558 + u64 csize; 559 + 560 + if (last_pfn && (this_pfn != last_pfn + 1)) { 561 + /* Pin a contiguous chunk of memory */ 562 + csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 563 + if (vhost_vdpa_map(v, iova, csize, 564 + map_pfn << PAGE_SHIFT, 565 + msg->perm)) 566 + goto out; 567 + map_pfn = this_pfn; 568 + iova += csize; 569 + } 570 + 571 + last_pfn = this_pfn; 572 + } 573 + 574 + cur_base += ret << PAGE_SHIFT; 575 + npages -= ret; 576 + } 577 + 578 + /* Pin the rest chunk */ 579 + ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 580 + map_pfn << PAGE_SHIFT, msg->perm); 581 + out: 582 + if (ret) { 583 + vhost_vdpa_unmap(v, msg->iova, msg->size); 584 + atomic64_sub(npages, &dev->mm->pinned_vm); 585 + } 586 + up_read(&dev->mm->mmap_sem); 587 + free_page((unsigned long)page_list); 588 + return ret; 589 + } 590 + 591 + static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 592 + struct vhost_iotlb_msg *msg) 593 + { 594 + struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 595 + int r = 0; 596 + 597 + r = vhost_dev_check_owner(dev); 598 + if (r) 599 + return r; 600 + 601 + switch (msg->type) { 602 + case VHOST_IOTLB_UPDATE: 603 + r = vhost_vdpa_process_iotlb_update(v, msg); 604 + break; 605 + case VHOST_IOTLB_INVALIDATE: 606 + vhost_vdpa_unmap(v, msg->iova, msg->size); 607 + break; 608 + default: 609 + r = -EINVAL; 610 + break; 611 + } 612 + 613 + return r; 614 + } 615 + 616 + static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 617 + struct iov_iter *from) 618 + { 619 + struct file *file = iocb->ki_filp; 620 + struct vhost_vdpa *v = file->private_data; 621 + struct vhost_dev *dev = &v->vdev; 622 + 623 + return vhost_chr_write_iter(dev, from); 624 + } 625 + 626 + static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 627 + { 628 + struct vdpa_device *vdpa = v->vdpa; 629 + const struct vdpa_config_ops *ops = vdpa->config; 630 + struct device *dma_dev = vdpa_get_dma_dev(vdpa); 631 + struct bus_type *bus; 632 + int ret; 633 + 634 + /* Device want to do DMA by itself */ 635 + if (ops->set_map || ops->dma_map) 636 + return 0; 637 + 638 + bus = dma_dev->bus; 639 + if (!bus) 640 + return -EFAULT; 641 + 642 + if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 643 + return -ENOTSUPP; 644 + 645 + v->domain = iommu_domain_alloc(bus); 646 + if (!v->domain) 647 + return -EIO; 648 + 649 + ret = iommu_attach_device(v->domain, dma_dev); 650 + if (ret) 651 + goto err_attach; 652 + 653 + return 0; 654 + 655 + err_attach: 656 + iommu_domain_free(v->domain); 657 + return ret; 658 + } 659 + 660 + static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 661 + { 662 + struct vdpa_device *vdpa = v->vdpa; 663 + struct device *dma_dev = vdpa_get_dma_dev(vdpa); 664 + 665 + if (v->domain) { 666 + iommu_detach_device(v->domain, dma_dev); 667 + iommu_domain_free(v->domain); 668 + } 669 + 670 + v->domain = NULL; 671 + } 672 + 673 + static int vhost_vdpa_open(struct inode *inode, struct file *filep) 674 + { 675 + struct vhost_vdpa *v; 676 + struct vhost_dev *dev; 677 + struct vhost_virtqueue **vqs; 678 + int nvqs, i, r, opened; 679 + 680 + v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 681 + if (!v) 682 + return -ENODEV; 683 + 684 + opened = atomic_cmpxchg(&v->opened, 0, 1); 685 + if (opened) 686 + return -EBUSY; 687 + 688 + nvqs = v->nvqs; 689 + vhost_vdpa_reset(v); 690 + 691 + vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 692 + if (!vqs) { 693 + r = -ENOMEM; 694 + goto err; 695 + } 696 + 697 + dev = &v->vdev; 698 + for (i = 0; i < nvqs; i++) { 699 + vqs[i] = &v->vqs[i]; 700 + vqs[i]->handle_kick = handle_vq_kick; 701 + } 702 + vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, 703 + vhost_vdpa_process_iotlb_msg); 704 + 705 + dev->iotlb = vhost_iotlb_alloc(0, 0); 706 + if (!dev->iotlb) { 707 + r = -ENOMEM; 708 + goto err_init_iotlb; 709 + } 710 + 711 + r = vhost_vdpa_alloc_domain(v); 712 + if (r) 713 + goto err_init_iotlb; 714 + 715 + filep->private_data = v; 716 + 717 + return 0; 718 + 719 + err_init_iotlb: 720 + vhost_dev_cleanup(&v->vdev); 721 + err: 722 + atomic_dec(&v->opened); 723 + return r; 724 + } 725 + 726 + static int vhost_vdpa_release(struct inode *inode, struct file *filep) 727 + { 728 + struct vhost_vdpa *v = filep->private_data; 729 + struct vhost_dev *d = &v->vdev; 730 + 731 + mutex_lock(&d->mutex); 732 + filep->private_data = NULL; 733 + vhost_vdpa_reset(v); 734 + vhost_dev_stop(&v->vdev); 735 + vhost_vdpa_iotlb_free(v); 736 + vhost_vdpa_free_domain(v); 737 + vhost_dev_cleanup(&v->vdev); 738 + kfree(v->vdev.vqs); 739 + mutex_unlock(&d->mutex); 740 + 741 + atomic_dec(&v->opened); 742 + complete(&v->completion); 743 + 744 + return 0; 745 + } 746 + 747 + static const struct file_operations vhost_vdpa_fops = { 748 + .owner = THIS_MODULE, 749 + .open = vhost_vdpa_open, 750 + .release = vhost_vdpa_release, 751 + .write_iter = vhost_vdpa_chr_write_iter, 752 + .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 753 + .compat_ioctl = compat_ptr_ioctl, 754 + }; 755 + 756 + static void vhost_vdpa_release_dev(struct device *device) 757 + { 758 + struct vhost_vdpa *v = 759 + container_of(device, struct vhost_vdpa, dev); 760 + 761 + ida_simple_remove(&vhost_vdpa_ida, v->minor); 762 + kfree(v->vqs); 763 + kfree(v); 764 + } 765 + 766 + static int vhost_vdpa_probe(struct vdpa_device *vdpa) 767 + { 768 + const struct vdpa_config_ops *ops = vdpa->config; 769 + struct vhost_vdpa *v; 770 + int minor, nvqs = VHOST_VDPA_VQ_MAX; 771 + int r; 772 + 773 + /* Currently, we only accept the network devices. */ 774 + if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) 775 + return -ENOTSUPP; 776 + 777 + v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 778 + if (!v) 779 + return -ENOMEM; 780 + 781 + minor = ida_simple_get(&vhost_vdpa_ida, 0, 782 + VHOST_VDPA_DEV_MAX, GFP_KERNEL); 783 + if (minor < 0) { 784 + kfree(v); 785 + return minor; 786 + } 787 + 788 + atomic_set(&v->opened, 0); 789 + v->minor = minor; 790 + v->vdpa = vdpa; 791 + v->nvqs = nvqs; 792 + v->virtio_id = ops->get_device_id(vdpa); 793 + 794 + device_initialize(&v->dev); 795 + v->dev.release = vhost_vdpa_release_dev; 796 + v->dev.parent = &vdpa->dev; 797 + v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 798 + v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue), 799 + GFP_KERNEL); 800 + if (!v->vqs) { 801 + r = -ENOMEM; 802 + goto err; 803 + } 804 + 805 + r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 806 + if (r) 807 + goto err; 808 + 809 + cdev_init(&v->cdev, &vhost_vdpa_fops); 810 + v->cdev.owner = THIS_MODULE; 811 + 812 + r = cdev_device_add(&v->cdev, &v->dev); 813 + if (r) 814 + goto err; 815 + 816 + init_completion(&v->completion); 817 + vdpa_set_drvdata(vdpa, v); 818 + 819 + return 0; 820 + 821 + err: 822 + put_device(&v->dev); 823 + return r; 824 + } 825 + 826 + static void vhost_vdpa_remove(struct vdpa_device *vdpa) 827 + { 828 + struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 829 + int opened; 830 + 831 + cdev_device_del(&v->cdev, &v->dev); 832 + 833 + do { 834 + opened = atomic_cmpxchg(&v->opened, 0, 1); 835 + if (!opened) 836 + break; 837 + wait_for_completion(&v->completion); 838 + } while (1); 839 + 840 + put_device(&v->dev); 841 + } 842 + 843 + static struct vdpa_driver vhost_vdpa_driver = { 844 + .driver = { 845 + .name = "vhost_vdpa", 846 + }, 847 + .probe = vhost_vdpa_probe, 848 + .remove = vhost_vdpa_remove, 849 + }; 850 + 851 + static int __init vhost_vdpa_init(void) 852 + { 853 + int r; 854 + 855 + r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 856 + "vhost-vdpa"); 857 + if (r) 858 + goto err_alloc_chrdev; 859 + 860 + r = vdpa_register_driver(&vhost_vdpa_driver); 861 + if (r) 862 + goto err_vdpa_register_driver; 863 + 864 + return 0; 865 + 866 + err_vdpa_register_driver: 867 + unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 868 + err_alloc_chrdev: 869 + return r; 870 + } 871 + module_init(vhost_vdpa_init); 872 + 873 + static void __exit vhost_vdpa_exit(void) 874 + { 875 + vdpa_unregister_driver(&vhost_vdpa_driver); 876 + unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 877 + } 878 + module_exit(vhost_vdpa_exit); 879 + 880 + MODULE_VERSION("0.0.1"); 881 + MODULE_LICENSE("GPL v2"); 882 + MODULE_AUTHOR("Intel Corporation"); 883 + MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
+79 -154
drivers/vhost/vhost.c
··· 50 50 #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) 51 51 #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) 52 52 53 - INTERVAL_TREE_DEFINE(struct vhost_umem_node, 54 - rb, __u64, __subtree_last, 55 - START, LAST, static inline, vhost_umem_interval_tree); 56 - 57 53 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY 58 54 static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) 59 55 { ··· 453 457 454 458 void vhost_dev_init(struct vhost_dev *dev, 455 459 struct vhost_virtqueue **vqs, int nvqs, 456 - int iov_limit, int weight, int byte_weight) 460 + int iov_limit, int weight, int byte_weight, 461 + int (*msg_handler)(struct vhost_dev *dev, 462 + struct vhost_iotlb_msg *msg)) 457 463 { 458 464 struct vhost_virtqueue *vq; 459 465 int i; ··· 471 473 dev->iov_limit = iov_limit; 472 474 dev->weight = weight; 473 475 dev->byte_weight = byte_weight; 476 + dev->msg_handler = msg_handler; 474 477 init_llist_head(&dev->work_list); 475 478 init_waitqueue_head(&dev->wait); 476 479 INIT_LIST_HEAD(&dev->read_list); ··· 580 581 } 581 582 EXPORT_SYMBOL_GPL(vhost_dev_set_owner); 582 583 583 - struct vhost_umem *vhost_dev_reset_owner_prepare(void) 584 + static struct vhost_iotlb *iotlb_alloc(void) 584 585 { 585 - return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL); 586 + return vhost_iotlb_alloc(max_iotlb_entries, 587 + VHOST_IOTLB_FLAG_RETIRE); 588 + } 589 + 590 + struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) 591 + { 592 + return iotlb_alloc(); 586 593 } 587 594 EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); 588 595 589 596 /* Caller should have device mutex */ 590 - void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) 597 + void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) 591 598 { 592 599 int i; 593 600 594 601 vhost_dev_cleanup(dev); 595 602 596 - /* Restore memory to default empty mapping. */ 597 - INIT_LIST_HEAD(&umem->umem_list); 598 603 dev->umem = umem; 599 604 /* We don't need VQ locks below since vhost_dev_cleanup makes sure 600 605 * VQs aren't running. ··· 620 617 } 621 618 } 622 619 EXPORT_SYMBOL_GPL(vhost_dev_stop); 623 - 624 - static void vhost_umem_free(struct vhost_umem *umem, 625 - struct vhost_umem_node *node) 626 - { 627 - vhost_umem_interval_tree_remove(node, &umem->umem_tree); 628 - list_del(&node->link); 629 - kfree(node); 630 - umem->numem--; 631 - } 632 - 633 - static void vhost_umem_clean(struct vhost_umem *umem) 634 - { 635 - struct vhost_umem_node *node, *tmp; 636 - 637 - if (!umem) 638 - return; 639 - 640 - list_for_each_entry_safe(node, tmp, &umem->umem_list, link) 641 - vhost_umem_free(umem, node); 642 - 643 - kvfree(umem); 644 - } 645 620 646 621 static void vhost_clear_msg(struct vhost_dev *dev) 647 622 { ··· 658 677 eventfd_ctx_put(dev->log_ctx); 659 678 dev->log_ctx = NULL; 660 679 /* No one will access memory at this point */ 661 - vhost_umem_clean(dev->umem); 680 + vhost_iotlb_free(dev->umem); 662 681 dev->umem = NULL; 663 - vhost_umem_clean(dev->iotlb); 682 + vhost_iotlb_free(dev->iotlb); 664 683 dev->iotlb = NULL; 665 684 vhost_clear_msg(dev); 666 685 wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); ··· 696 715 } 697 716 698 717 /* Caller should have vq mutex and device mutex. */ 699 - static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, 718 + static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, 700 719 int log_all) 701 720 { 702 - struct vhost_umem_node *node; 721 + struct vhost_iotlb_map *map; 703 722 704 723 if (!umem) 705 724 return false; 706 725 707 - list_for_each_entry(node, &umem->umem_list, link) { 708 - unsigned long a = node->userspace_addr; 726 + list_for_each_entry(map, &umem->list, link) { 727 + unsigned long a = map->addr; 709 728 710 - if (vhost_overflow(node->userspace_addr, node->size)) 729 + if (vhost_overflow(map->addr, map->size)) 711 730 return false; 712 731 713 732 714 - if (!access_ok((void __user *)a, 715 - node->size)) 733 + if (!access_ok((void __user *)a, map->size)) 716 734 return false; 717 735 else if (log_all && !log_access_ok(log_base, 718 - node->start, 719 - node->size)) 736 + map->start, 737 + map->size)) 720 738 return false; 721 739 } 722 740 return true; ··· 725 745 u64 addr, unsigned int size, 726 746 int type) 727 747 { 728 - const struct vhost_umem_node *node = vq->meta_iotlb[type]; 748 + const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; 729 749 730 - if (!node) 750 + if (!map) 731 751 return NULL; 732 752 733 - return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); 753 + return (void *)(uintptr_t)(map->addr + addr - map->start); 734 754 } 735 755 736 756 /* Can we switch to this memory table? */ 737 757 /* Caller should have device mutex but not vq mutex */ 738 - static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, 758 + static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, 739 759 int log_all) 740 760 { 741 761 int i; ··· 1000 1020 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); 1001 1021 } 1002 1022 1003 - static int vhost_new_umem_range(struct vhost_umem *umem, 1004 - u64 start, u64 size, u64 end, 1005 - u64 userspace_addr, int perm) 1006 - { 1007 - struct vhost_umem_node *tmp, *node; 1008 - 1009 - if (!size) 1010 - return -EFAULT; 1011 - 1012 - node = kmalloc(sizeof(*node), GFP_ATOMIC); 1013 - if (!node) 1014 - return -ENOMEM; 1015 - 1016 - if (umem->numem == max_iotlb_entries) { 1017 - tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); 1018 - vhost_umem_free(umem, tmp); 1019 - } 1020 - 1021 - node->start = start; 1022 - node->size = size; 1023 - node->last = end; 1024 - node->userspace_addr = userspace_addr; 1025 - node->perm = perm; 1026 - INIT_LIST_HEAD(&node->link); 1027 - list_add_tail(&node->link, &umem->umem_list); 1028 - vhost_umem_interval_tree_insert(node, &umem->umem_tree); 1029 - umem->numem++; 1030 - 1031 - return 0; 1032 - } 1033 - 1034 - static void vhost_del_umem_range(struct vhost_umem *umem, 1035 - u64 start, u64 end) 1036 - { 1037 - struct vhost_umem_node *node; 1038 - 1039 - while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, 1040 - start, end))) 1041 - vhost_umem_free(umem, node); 1042 - } 1043 - 1044 1023 static void vhost_iotlb_notify_vq(struct vhost_dev *d, 1045 1024 struct vhost_iotlb_msg *msg) 1046 1025 { ··· 1056 1117 break; 1057 1118 } 1058 1119 vhost_vq_meta_reset(dev); 1059 - if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, 1060 - msg->iova + msg->size - 1, 1061 - msg->uaddr, msg->perm)) { 1120 + if (vhost_iotlb_add_range(dev->iotlb, msg->iova, 1121 + msg->iova + msg->size - 1, 1122 + msg->uaddr, msg->perm)) { 1062 1123 ret = -ENOMEM; 1063 1124 break; 1064 1125 } ··· 1070 1131 break; 1071 1132 } 1072 1133 vhost_vq_meta_reset(dev); 1073 - vhost_del_umem_range(dev->iotlb, msg->iova, 1074 - msg->iova + msg->size - 1); 1134 + vhost_iotlb_del_range(dev->iotlb, msg->iova, 1135 + msg->iova + msg->size - 1); 1075 1136 break; 1076 1137 default: 1077 1138 ret = -EINVAL; ··· 1117 1178 ret = -EINVAL; 1118 1179 goto done; 1119 1180 } 1120 - if (vhost_process_iotlb_msg(dev, &msg)) { 1181 + 1182 + if (dev->msg_handler) 1183 + ret = dev->msg_handler(dev, &msg); 1184 + else 1185 + ret = vhost_process_iotlb_msg(dev, &msg); 1186 + if (ret) { 1121 1187 ret = -EFAULT; 1122 1188 goto done; 1123 1189 } ··· 1255 1311 } 1256 1312 1257 1313 static void vhost_vq_meta_update(struct vhost_virtqueue *vq, 1258 - const struct vhost_umem_node *node, 1314 + const struct vhost_iotlb_map *map, 1259 1315 int type) 1260 1316 { 1261 1317 int access = (type == VHOST_ADDR_USED) ? 1262 1318 VHOST_ACCESS_WO : VHOST_ACCESS_RO; 1263 1319 1264 - if (likely(node->perm & access)) 1265 - vq->meta_iotlb[type] = node; 1320 + if (likely(map->perm & access)) 1321 + vq->meta_iotlb[type] = map; 1266 1322 } 1267 1323 1268 1324 static bool iotlb_access_ok(struct vhost_virtqueue *vq, 1269 1325 int access, u64 addr, u64 len, int type) 1270 1326 { 1271 - const struct vhost_umem_node *node; 1272 - struct vhost_umem *umem = vq->iotlb; 1327 + const struct vhost_iotlb_map *map; 1328 + struct vhost_iotlb *umem = vq->iotlb; 1273 1329 u64 s = 0, size, orig_addr = addr, last = addr + len - 1; 1274 1330 1275 1331 if (vhost_vq_meta_fetch(vq, addr, len, type)) 1276 1332 return true; 1277 1333 1278 1334 while (len > s) { 1279 - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, 1280 - addr, 1281 - last); 1282 - if (node == NULL || node->start > addr) { 1335 + map = vhost_iotlb_itree_first(umem, addr, last); 1336 + if (map == NULL || map->start > addr) { 1283 1337 vhost_iotlb_miss(vq, addr, access); 1284 1338 return false; 1285 - } else if (!(node->perm & access)) { 1339 + } else if (!(map->perm & access)) { 1286 1340 /* Report the possible access violation by 1287 1341 * request another translation from userspace. 1288 1342 */ 1289 1343 return false; 1290 1344 } 1291 1345 1292 - size = node->size - addr + node->start; 1346 + size = map->size - addr + map->start; 1293 1347 1294 1348 if (orig_addr == addr && size >= len) 1295 - vhost_vq_meta_update(vq, node, type); 1349 + vhost_vq_meta_update(vq, map, type); 1296 1350 1297 1351 s += size; 1298 1352 addr += size; ··· 1306 1364 if (!vq->iotlb) 1307 1365 return 1; 1308 1366 1309 - return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1367 + return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, 1310 1368 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && 1311 - iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, 1369 + iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, 1312 1370 vhost_get_avail_size(vq, num), 1313 1371 VHOST_ADDR_AVAIL) && 1314 - iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, 1372 + iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, 1315 1373 vhost_get_used_size(vq, num), VHOST_ADDR_USED); 1316 1374 } 1317 1375 EXPORT_SYMBOL_GPL(vq_meta_prefetch); ··· 1350 1408 } 1351 1409 EXPORT_SYMBOL_GPL(vhost_vq_access_ok); 1352 1410 1353 - static struct vhost_umem *vhost_umem_alloc(void) 1354 - { 1355 - struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL); 1356 - 1357 - if (!umem) 1358 - return NULL; 1359 - 1360 - umem->umem_tree = RB_ROOT_CACHED; 1361 - umem->numem = 0; 1362 - INIT_LIST_HEAD(&umem->umem_list); 1363 - 1364 - return umem; 1365 - } 1366 - 1367 1411 static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) 1368 1412 { 1369 1413 struct vhost_memory mem, *newmem; 1370 1414 struct vhost_memory_region *region; 1371 - struct vhost_umem *newumem, *oldumem; 1415 + struct vhost_iotlb *newumem, *oldumem; 1372 1416 unsigned long size = offsetof(struct vhost_memory, regions); 1373 1417 int i; 1374 1418 ··· 1376 1448 return -EFAULT; 1377 1449 } 1378 1450 1379 - newumem = vhost_umem_alloc(); 1451 + newumem = iotlb_alloc(); 1380 1452 if (!newumem) { 1381 1453 kvfree(newmem); 1382 1454 return -ENOMEM; ··· 1385 1457 for (region = newmem->regions; 1386 1458 region < newmem->regions + mem.nregions; 1387 1459 region++) { 1388 - if (vhost_new_umem_range(newumem, 1389 - region->guest_phys_addr, 1390 - region->memory_size, 1391 - region->guest_phys_addr + 1392 - region->memory_size - 1, 1393 - region->userspace_addr, 1394 - VHOST_ACCESS_RW)) 1460 + if (vhost_iotlb_add_range(newumem, 1461 + region->guest_phys_addr, 1462 + region->guest_phys_addr + 1463 + region->memory_size - 1, 1464 + region->userspace_addr, 1465 + VHOST_MAP_RW)) 1395 1466 goto err; 1396 1467 } 1397 1468 ··· 1408 1481 } 1409 1482 1410 1483 kvfree(newmem); 1411 - vhost_umem_clean(oldumem); 1484 + vhost_iotlb_free(oldumem); 1412 1485 return 0; 1413 1486 1414 1487 err: 1415 - vhost_umem_clean(newumem); 1488 + vhost_iotlb_free(newumem); 1416 1489 kvfree(newmem); 1417 1490 return -EFAULT; 1418 1491 } ··· 1653 1726 1654 1727 int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) 1655 1728 { 1656 - struct vhost_umem *niotlb, *oiotlb; 1729 + struct vhost_iotlb *niotlb, *oiotlb; 1657 1730 int i; 1658 1731 1659 - niotlb = vhost_umem_alloc(); 1732 + niotlb = iotlb_alloc(); 1660 1733 if (!niotlb) 1661 1734 return -ENOMEM; 1662 1735 ··· 1672 1745 mutex_unlock(&vq->mutex); 1673 1746 } 1674 1747 1675 - vhost_umem_clean(oiotlb); 1748 + vhost_iotlb_free(oiotlb); 1676 1749 1677 1750 return 0; 1678 1751 } ··· 1802 1875 1803 1876 static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) 1804 1877 { 1805 - struct vhost_umem *umem = vq->umem; 1806 - struct vhost_umem_node *u; 1878 + struct vhost_iotlb *umem = vq->umem; 1879 + struct vhost_iotlb_map *u; 1807 1880 u64 start, end, l, min; 1808 1881 int r; 1809 1882 bool hit = false; ··· 1813 1886 /* More than one GPAs can be mapped into a single HVA. So 1814 1887 * iterate all possible umems here to be safe. 1815 1888 */ 1816 - list_for_each_entry(u, &umem->umem_list, link) { 1817 - if (u->userspace_addr > hva - 1 + len || 1818 - u->userspace_addr - 1 + u->size < hva) 1889 + list_for_each_entry(u, &umem->list, link) { 1890 + if (u->addr > hva - 1 + len || 1891 + u->addr - 1 + u->size < hva) 1819 1892 continue; 1820 - start = max(u->userspace_addr, hva); 1821 - end = min(u->userspace_addr - 1 + u->size, 1822 - hva - 1 + len); 1893 + start = max(u->addr, hva); 1894 + end = min(u->addr - 1 + u->size, hva - 1 + len); 1823 1895 l = end - start + 1; 1824 1896 r = log_write(vq->log_base, 1825 - u->start + start - u->userspace_addr, 1897 + u->start + start - u->addr, 1826 1898 l); 1827 1899 if (r < 0) 1828 1900 return r; ··· 1972 2046 static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, 1973 2047 struct iovec iov[], int iov_size, int access) 1974 2048 { 1975 - const struct vhost_umem_node *node; 2049 + const struct vhost_iotlb_map *map; 1976 2050 struct vhost_dev *dev = vq->dev; 1977 - struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; 2051 + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; 1978 2052 struct iovec *_iov; 1979 2053 u64 s = 0; 1980 2054 int ret = 0; ··· 1986 2060 break; 1987 2061 } 1988 2062 1989 - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, 1990 - addr, addr + len - 1); 1991 - if (node == NULL || node->start > addr) { 2063 + map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); 2064 + if (map == NULL || map->start > addr) { 1992 2065 if (umem != dev->iotlb) { 1993 2066 ret = -EFAULT; 1994 2067 break; 1995 2068 } 1996 2069 ret = -EAGAIN; 1997 2070 break; 1998 - } else if (!(node->perm & access)) { 2071 + } else if (!(map->perm & access)) { 1999 2072 ret = -EPERM; 2000 2073 break; 2001 2074 } 2002 2075 2003 2076 _iov = iov + ret; 2004 - size = node->size - addr + node->start; 2077 + size = map->size - addr + map->start; 2005 2078 _iov->iov_len = min((u64)len - s, size); 2006 2079 _iov->iov_base = (void __user *)(unsigned long) 2007 - (node->userspace_addr + addr - node->start); 2080 + (map->addr + addr - map->start); 2008 2081 s += size; 2009 2082 addr += size; 2010 2083 ++ret;
+16 -29
drivers/vhost/vhost.h
··· 12 12 #include <linux/virtio_config.h> 13 13 #include <linux/virtio_ring.h> 14 14 #include <linux/atomic.h> 15 + #include <linux/vhost_iotlb.h> 15 16 16 17 struct vhost_work; 17 18 typedef void (*vhost_work_fn_t)(struct vhost_work *work); ··· 53 52 u64 len; 54 53 }; 55 54 56 - #define START(node) ((node)->start) 57 - #define LAST(node) ((node)->last) 58 - 59 - struct vhost_umem_node { 60 - struct rb_node rb; 61 - struct list_head link; 62 - __u64 start; 63 - __u64 last; 64 - __u64 size; 65 - __u64 userspace_addr; 66 - __u32 perm; 67 - __u32 flags_padding; 68 - __u64 __subtree_last; 69 - }; 70 - 71 - struct vhost_umem { 72 - struct rb_root_cached umem_tree; 73 - struct list_head umem_list; 74 - int numem; 75 - }; 76 - 77 55 enum vhost_uaddr_type { 78 56 VHOST_ADDR_DESC = 0, 79 57 VHOST_ADDR_AVAIL = 1, ··· 70 90 struct vring_desc __user *desc; 71 91 struct vring_avail __user *avail; 72 92 struct vring_used __user *used; 73 - const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; 93 + const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; 74 94 struct file *kick; 75 95 struct eventfd_ctx *call_ctx; 76 96 struct eventfd_ctx *error_ctx; ··· 108 128 struct iovec *indirect; 109 129 struct vring_used_elem *heads; 110 130 /* Protected by virtqueue mutex. */ 111 - struct vhost_umem *umem; 112 - struct vhost_umem *iotlb; 131 + struct vhost_iotlb *umem; 132 + struct vhost_iotlb *iotlb; 113 133 void *private_data; 114 134 u64 acked_features; 115 135 u64 acked_backend_features; ··· 144 164 struct eventfd_ctx *log_ctx; 145 165 struct llist_head work_list; 146 166 struct task_struct *worker; 147 - struct vhost_umem *umem; 148 - struct vhost_umem *iotlb; 167 + struct vhost_iotlb *umem; 168 + struct vhost_iotlb *iotlb; 149 169 spinlock_t iotlb_lock; 150 170 struct list_head read_list; 151 171 struct list_head pending_list; ··· 154 174 int weight; 155 175 int byte_weight; 156 176 u64 kcov_handle; 177 + int (*msg_handler)(struct vhost_dev *dev, 178 + struct vhost_iotlb_msg *msg); 157 179 }; 158 180 159 181 bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); 160 182 void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, 161 - int nvqs, int iov_limit, int weight, int byte_weight); 183 + int nvqs, int iov_limit, int weight, int byte_weight, 184 + int (*msg_handler)(struct vhost_dev *dev, 185 + struct vhost_iotlb_msg *msg)); 162 186 long vhost_dev_set_owner(struct vhost_dev *dev); 163 187 bool vhost_dev_has_owner(struct vhost_dev *dev); 164 188 long vhost_dev_check_owner(struct vhost_dev *); 165 - struct vhost_umem *vhost_dev_reset_owner_prepare(void); 166 - void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); 189 + struct vhost_iotlb *vhost_dev_reset_owner_prepare(void); 190 + void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *iotlb); 167 191 void vhost_dev_cleanup(struct vhost_dev *); 168 192 void vhost_dev_stop(struct vhost_dev *); 169 193 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); ··· 212 228 ssize_t vhost_chr_write_iter(struct vhost_dev *dev, 213 229 struct iov_iter *from); 214 230 int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); 231 + 232 + void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, 233 + struct vhost_iotlb_map *map); 215 234 216 235 #define vq_err(vq, fmt, ...) do { \ 217 236 pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+398 -23
drivers/vhost/vringh.c
··· 13 13 #include <linux/uaccess.h> 14 14 #include <linux/slab.h> 15 15 #include <linux/export.h> 16 + #include <linux/bvec.h> 17 + #include <linux/highmem.h> 18 + #include <linux/vhost_iotlb.h> 16 19 #include <uapi/linux/virtio_config.h> 17 20 18 21 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) ··· 74 71 } 75 72 76 73 /* Copy some bytes to/from the iovec. Returns num copied. */ 77 - static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, 74 + static inline ssize_t vringh_iov_xfer(struct vringh *vrh, 75 + struct vringh_kiov *iov, 78 76 void *ptr, size_t len, 79 - int (*xfer)(void *addr, void *ptr, 77 + int (*xfer)(const struct vringh *vrh, 78 + void *addr, void *ptr, 80 79 size_t len)) 81 80 { 82 81 int err, done = 0; ··· 87 82 size_t partlen; 88 83 89 84 partlen = min(iov->iov[iov->i].iov_len, len); 90 - err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); 85 + err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); 91 86 if (err) 92 87 return err; 93 88 done += partlen; ··· 101 96 /* Fix up old iov element then increment. */ 102 97 iov->iov[iov->i].iov_len = iov->consumed; 103 98 iov->iov[iov->i].iov_base -= iov->consumed; 99 + 104 100 105 101 iov->consumed = 0; 106 102 iov->i++; ··· 233 227 u64 addr, 234 228 struct vringh_range *r), 235 229 struct vringh_range *range, 236 - int (*copy)(void *dst, const void *src, size_t len)) 230 + int (*copy)(const struct vringh *vrh, 231 + void *dst, const void *src, size_t len)) 237 232 { 238 233 size_t part, len = sizeof(struct vring_desc); 239 234 ··· 248 241 if (!rcheck(vrh, addr, &part, range, getrange)) 249 242 return -EINVAL; 250 243 251 - err = copy(dst, src, part); 244 + err = copy(vrh, dst, src, part); 252 245 if (err) 253 246 return err; 254 247 ··· 269 262 struct vringh_range *)), 270 263 bool (*getrange)(struct vringh *, u64, struct vringh_range *), 271 264 gfp_t gfp, 272 - int (*copy)(void *dst, const void *src, size_t len)) 265 + int (*copy)(const struct vringh *vrh, 266 + void *dst, const void *src, size_t len)) 273 267 { 274 268 int err, count = 0, up_next, desc_max; 275 269 struct vring_desc desc, *descs; ··· 299 291 err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, 300 292 &slowrange, copy); 301 293 else 302 - err = copy(&desc, &descs[i], sizeof(desc)); 294 + err = copy(vrh, &desc, &descs[i], sizeof(desc)); 303 295 if (unlikely(err)) 304 296 goto fail; 305 297 ··· 412 404 unsigned int num_used, 413 405 int (*putu16)(const struct vringh *vrh, 414 406 __virtio16 *p, u16 val), 415 - int (*putused)(struct vring_used_elem *dst, 407 + int (*putused)(const struct vringh *vrh, 408 + struct vring_used_elem *dst, 416 409 const struct vring_used_elem 417 410 *src, unsigned num)) 418 411 { ··· 429 420 /* Compiler knows num_used == 1 sometimes, hence extra check */ 430 421 if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { 431 422 u16 part = vrh->vring.num - off; 432 - err = putused(&used_ring->ring[off], used, part); 423 + err = putused(vrh, &used_ring->ring[off], used, part); 433 424 if (!err) 434 - err = putused(&used_ring->ring[0], used + part, 425 + err = putused(vrh, &used_ring->ring[0], used + part, 435 426 num_used - part); 436 427 } else 437 - err = putused(&used_ring->ring[off], used, num_used); 428 + err = putused(vrh, &used_ring->ring[off], used, num_used); 438 429 439 430 if (err) { 440 431 vringh_bad("Failed to write %u used entries %u at %p", ··· 573 564 return put_user(v, (__force __virtio16 __user *)p); 574 565 } 575 566 576 - static inline int copydesc_user(void *dst, const void *src, size_t len) 567 + static inline int copydesc_user(const struct vringh *vrh, 568 + void *dst, const void *src, size_t len) 577 569 { 578 570 return copy_from_user(dst, (__force void __user *)src, len) ? 579 571 -EFAULT : 0; 580 572 } 581 573 582 - static inline int putused_user(struct vring_used_elem *dst, 574 + static inline int putused_user(const struct vringh *vrh, 575 + struct vring_used_elem *dst, 583 576 const struct vring_used_elem *src, 584 577 unsigned int num) 585 578 { ··· 589 578 sizeof(*dst) * num) ? -EFAULT : 0; 590 579 } 591 580 592 - static inline int xfer_from_user(void *src, void *dst, size_t len) 581 + static inline int xfer_from_user(const struct vringh *vrh, void *src, 582 + void *dst, size_t len) 593 583 { 594 584 return copy_from_user(dst, (__force void __user *)src, len) ? 595 585 -EFAULT : 0; 596 586 } 597 587 598 - static inline int xfer_to_user(void *dst, void *src, size_t len) 588 + static inline int xfer_to_user(const struct vringh *vrh, 589 + void *dst, void *src, size_t len) 599 590 { 600 591 return copy_to_user((__force void __user *)dst, src, len) ? 601 592 -EFAULT : 0; ··· 719 706 */ 720 707 ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) 721 708 { 722 - return vringh_iov_xfer((struct vringh_kiov *)riov, 709 + return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, 723 710 dst, len, xfer_from_user); 724 711 } 725 712 EXPORT_SYMBOL(vringh_iov_pull_user); ··· 735 722 ssize_t vringh_iov_push_user(struct vringh_iov *wiov, 736 723 const void *src, size_t len) 737 724 { 738 - return vringh_iov_xfer((struct vringh_kiov *)wiov, 725 + return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, 739 726 (void *)src, len, xfer_to_user); 740 727 } 741 728 EXPORT_SYMBOL(vringh_iov_push_user); ··· 845 832 return 0; 846 833 } 847 834 848 - static inline int copydesc_kern(void *dst, const void *src, size_t len) 835 + static inline int copydesc_kern(const struct vringh *vrh, 836 + void *dst, const void *src, size_t len) 849 837 { 850 838 memcpy(dst, src, len); 851 839 return 0; 852 840 } 853 841 854 - static inline int putused_kern(struct vring_used_elem *dst, 842 + static inline int putused_kern(const struct vringh *vrh, 843 + struct vring_used_elem *dst, 855 844 const struct vring_used_elem *src, 856 845 unsigned int num) 857 846 { ··· 861 846 return 0; 862 847 } 863 848 864 - static inline int xfer_kern(void *src, void *dst, size_t len) 849 + static inline int xfer_kern(const struct vringh *vrh, void *src, 850 + void *dst, size_t len) 865 851 { 866 852 memcpy(dst, src, len); 867 853 return 0; 868 854 } 869 855 870 - static inline int kern_xfer(void *dst, void *src, size_t len) 856 + static inline int kern_xfer(const struct vringh *vrh, void *dst, 857 + void *src, size_t len) 871 858 { 872 859 memcpy(dst, src, len); 873 860 return 0; ··· 966 949 */ 967 950 ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) 968 951 { 969 - return vringh_iov_xfer(riov, dst, len, xfer_kern); 952 + return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); 970 953 } 971 954 EXPORT_SYMBOL(vringh_iov_pull_kern); 972 955 ··· 981 964 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, 982 965 const void *src, size_t len) 983 966 { 984 - return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer); 967 + return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); 985 968 } 986 969 EXPORT_SYMBOL(vringh_iov_push_kern); 987 970 ··· 1058 1041 return __vringh_need_notify(vrh, getu16_kern); 1059 1042 } 1060 1043 EXPORT_SYMBOL(vringh_need_notify_kern); 1044 + 1045 + static int iotlb_translate(const struct vringh *vrh, 1046 + u64 addr, u64 len, struct bio_vec iov[], 1047 + int iov_size, u32 perm) 1048 + { 1049 + struct vhost_iotlb_map *map; 1050 + struct vhost_iotlb *iotlb = vrh->iotlb; 1051 + int ret = 0; 1052 + u64 s = 0; 1053 + 1054 + while (len > s) { 1055 + u64 size, pa, pfn; 1056 + 1057 + if (unlikely(ret >= iov_size)) { 1058 + ret = -ENOBUFS; 1059 + break; 1060 + } 1061 + 1062 + map = vhost_iotlb_itree_first(iotlb, addr, 1063 + addr + len - 1); 1064 + if (!map || map->start > addr) { 1065 + ret = -EINVAL; 1066 + break; 1067 + } else if (!(map->perm & perm)) { 1068 + ret = -EPERM; 1069 + break; 1070 + } 1071 + 1072 + size = map->size - addr + map->start; 1073 + pa = map->addr + addr - map->start; 1074 + pfn = pa >> PAGE_SHIFT; 1075 + iov[ret].bv_page = pfn_to_page(pfn); 1076 + iov[ret].bv_len = min(len - s, size); 1077 + iov[ret].bv_offset = pa & (PAGE_SIZE - 1); 1078 + s += size; 1079 + addr += size; 1080 + ++ret; 1081 + } 1082 + 1083 + return ret; 1084 + } 1085 + 1086 + static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 1087 + void *src, size_t len) 1088 + { 1089 + struct iov_iter iter; 1090 + struct bio_vec iov[16]; 1091 + int ret; 1092 + 1093 + ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 1094 + len, iov, 16, VHOST_MAP_RO); 1095 + if (ret < 0) 1096 + return ret; 1097 + 1098 + iov_iter_bvec(&iter, READ, iov, ret, len); 1099 + 1100 + ret = copy_from_iter(dst, len, &iter); 1101 + 1102 + return ret; 1103 + } 1104 + 1105 + static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 1106 + void *src, size_t len) 1107 + { 1108 + struct iov_iter iter; 1109 + struct bio_vec iov[16]; 1110 + int ret; 1111 + 1112 + ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 1113 + len, iov, 16, VHOST_MAP_WO); 1114 + if (ret < 0) 1115 + return ret; 1116 + 1117 + iov_iter_bvec(&iter, WRITE, iov, ret, len); 1118 + 1119 + return copy_to_iter(src, len, &iter); 1120 + } 1121 + 1122 + static inline int getu16_iotlb(const struct vringh *vrh, 1123 + u16 *val, const __virtio16 *p) 1124 + { 1125 + struct bio_vec iov; 1126 + void *kaddr, *from; 1127 + int ret; 1128 + 1129 + /* Atomic read is needed for getu16 */ 1130 + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1131 + &iov, 1, VHOST_MAP_RO); 1132 + if (ret < 0) 1133 + return ret; 1134 + 1135 + kaddr = kmap_atomic(iov.bv_page); 1136 + from = kaddr + iov.bv_offset; 1137 + *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); 1138 + kunmap_atomic(kaddr); 1139 + 1140 + return 0; 1141 + } 1142 + 1143 + static inline int putu16_iotlb(const struct vringh *vrh, 1144 + __virtio16 *p, u16 val) 1145 + { 1146 + struct bio_vec iov; 1147 + void *kaddr, *to; 1148 + int ret; 1149 + 1150 + /* Atomic write is needed for putu16 */ 1151 + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1152 + &iov, 1, VHOST_MAP_WO); 1153 + if (ret < 0) 1154 + return ret; 1155 + 1156 + kaddr = kmap_atomic(iov.bv_page); 1157 + to = kaddr + iov.bv_offset; 1158 + WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); 1159 + kunmap_atomic(kaddr); 1160 + 1161 + return 0; 1162 + } 1163 + 1164 + static inline int copydesc_iotlb(const struct vringh *vrh, 1165 + void *dst, const void *src, size_t len) 1166 + { 1167 + int ret; 1168 + 1169 + ret = copy_from_iotlb(vrh, dst, (void *)src, len); 1170 + if (ret != len) 1171 + return -EFAULT; 1172 + 1173 + return 0; 1174 + } 1175 + 1176 + static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, 1177 + void *dst, size_t len) 1178 + { 1179 + int ret; 1180 + 1181 + ret = copy_from_iotlb(vrh, dst, src, len); 1182 + if (ret != len) 1183 + return -EFAULT; 1184 + 1185 + return 0; 1186 + } 1187 + 1188 + static inline int xfer_to_iotlb(const struct vringh *vrh, 1189 + void *dst, void *src, size_t len) 1190 + { 1191 + int ret; 1192 + 1193 + ret = copy_to_iotlb(vrh, dst, src, len); 1194 + if (ret != len) 1195 + return -EFAULT; 1196 + 1197 + return 0; 1198 + } 1199 + 1200 + static inline int putused_iotlb(const struct vringh *vrh, 1201 + struct vring_used_elem *dst, 1202 + const struct vring_used_elem *src, 1203 + unsigned int num) 1204 + { 1205 + int size = num * sizeof(*dst); 1206 + int ret; 1207 + 1208 + ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); 1209 + if (ret != size) 1210 + return -EFAULT; 1211 + 1212 + return 0; 1213 + } 1214 + 1215 + /** 1216 + * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. 1217 + * @vrh: the vringh to initialize. 1218 + * @features: the feature bits for this ring. 1219 + * @num: the number of elements. 1220 + * @weak_barriers: true if we only need memory barriers, not I/O. 1221 + * @desc: the userpace descriptor pointer. 1222 + * @avail: the userpace avail pointer. 1223 + * @used: the userpace used pointer. 1224 + * 1225 + * Returns an error if num is invalid. 1226 + */ 1227 + int vringh_init_iotlb(struct vringh *vrh, u64 features, 1228 + unsigned int num, bool weak_barriers, 1229 + struct vring_desc *desc, 1230 + struct vring_avail *avail, 1231 + struct vring_used *used) 1232 + { 1233 + return vringh_init_kern(vrh, features, num, weak_barriers, 1234 + desc, avail, used); 1235 + } 1236 + EXPORT_SYMBOL(vringh_init_iotlb); 1237 + 1238 + /** 1239 + * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. 1240 + * @vrh: the vring 1241 + * @iotlb: iotlb associated with this vring 1242 + */ 1243 + void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb) 1244 + { 1245 + vrh->iotlb = iotlb; 1246 + } 1247 + EXPORT_SYMBOL(vringh_set_iotlb); 1248 + 1249 + /** 1250 + * vringh_getdesc_iotlb - get next available descriptor from ring with 1251 + * IOTLB. 1252 + * @vrh: the kernelspace vring. 1253 + * @riov: where to put the readable descriptors (or NULL) 1254 + * @wiov: where to put the writable descriptors (or NULL) 1255 + * @head: head index we received, for passing to vringh_complete_iotlb(). 1256 + * @gfp: flags for allocating larger riov/wiov. 1257 + * 1258 + * Returns 0 if there was no descriptor, 1 if there was, or -errno. 1259 + * 1260 + * Note that on error return, you can tell the difference between an 1261 + * invalid ring and a single invalid descriptor: in the former case, 1262 + * *head will be vrh->vring.num. You may be able to ignore an invalid 1263 + * descriptor, but there's not much you can do with an invalid ring. 1264 + * 1265 + * Note that you may need to clean up riov and wiov, even on error! 1266 + */ 1267 + int vringh_getdesc_iotlb(struct vringh *vrh, 1268 + struct vringh_kiov *riov, 1269 + struct vringh_kiov *wiov, 1270 + u16 *head, 1271 + gfp_t gfp) 1272 + { 1273 + int err; 1274 + 1275 + err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); 1276 + if (err < 0) 1277 + return err; 1278 + 1279 + /* Empty... */ 1280 + if (err == vrh->vring.num) 1281 + return 0; 1282 + 1283 + *head = err; 1284 + err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 1285 + gfp, copydesc_iotlb); 1286 + if (err) 1287 + return err; 1288 + 1289 + return 1; 1290 + } 1291 + EXPORT_SYMBOL(vringh_getdesc_iotlb); 1292 + 1293 + /** 1294 + * vringh_iov_pull_iotlb - copy bytes from vring_iov. 1295 + * @vrh: the vring. 1296 + * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) 1297 + * @dst: the place to copy. 1298 + * @len: the maximum length to copy. 1299 + * 1300 + * Returns the bytes copied <= len or a negative errno. 1301 + */ 1302 + ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 1303 + struct vringh_kiov *riov, 1304 + void *dst, size_t len) 1305 + { 1306 + return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); 1307 + } 1308 + EXPORT_SYMBOL(vringh_iov_pull_iotlb); 1309 + 1310 + /** 1311 + * vringh_iov_push_iotlb - copy bytes into vring_iov. 1312 + * @vrh: the vring. 1313 + * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) 1314 + * @dst: the place to copy. 1315 + * @len: the maximum length to copy. 1316 + * 1317 + * Returns the bytes copied <= len or a negative errno. 1318 + */ 1319 + ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 1320 + struct vringh_kiov *wiov, 1321 + const void *src, size_t len) 1322 + { 1323 + return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); 1324 + } 1325 + EXPORT_SYMBOL(vringh_iov_push_iotlb); 1326 + 1327 + /** 1328 + * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). 1329 + * @vrh: the vring. 1330 + * @num: the number of descriptors to put back (ie. num 1331 + * vringh_get_iotlb() to undo). 1332 + * 1333 + * The next vringh_get_iotlb() will return the old descriptor(s) again. 1334 + */ 1335 + void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) 1336 + { 1337 + /* We only update vring_avail_event(vr) when we want to be notified, 1338 + * so we haven't changed that yet. 1339 + */ 1340 + vrh->last_avail_idx -= num; 1341 + } 1342 + EXPORT_SYMBOL(vringh_abandon_iotlb); 1343 + 1344 + /** 1345 + * vringh_complete_iotlb - we've finished with descriptor, publish it. 1346 + * @vrh: the vring. 1347 + * @head: the head as filled in by vringh_getdesc_iotlb. 1348 + * @len: the length of data we have written. 1349 + * 1350 + * You should check vringh_need_notify_iotlb() after one or more calls 1351 + * to this function. 1352 + */ 1353 + int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) 1354 + { 1355 + struct vring_used_elem used; 1356 + 1357 + used.id = cpu_to_vringh32(vrh, head); 1358 + used.len = cpu_to_vringh32(vrh, len); 1359 + 1360 + return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); 1361 + } 1362 + EXPORT_SYMBOL(vringh_complete_iotlb); 1363 + 1364 + /** 1365 + * vringh_notify_enable_iotlb - we want to know if something changes. 1366 + * @vrh: the vring. 1367 + * 1368 + * This always enables notifications, but returns false if there are 1369 + * now more buffers available in the vring. 1370 + */ 1371 + bool vringh_notify_enable_iotlb(struct vringh *vrh) 1372 + { 1373 + return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); 1374 + } 1375 + EXPORT_SYMBOL(vringh_notify_enable_iotlb); 1376 + 1377 + /** 1378 + * vringh_notify_disable_iotlb - don't tell us if something changes. 1379 + * @vrh: the vring. 1380 + * 1381 + * This is our normal running state: we disable and then only enable when 1382 + * we're going to sleep. 1383 + */ 1384 + void vringh_notify_disable_iotlb(struct vringh *vrh) 1385 + { 1386 + __vringh_notify_disable(vrh, putu16_iotlb); 1387 + } 1388 + EXPORT_SYMBOL(vringh_notify_disable_iotlb); 1389 + 1390 + /** 1391 + * vringh_need_notify_iotlb - must we tell the other side about used buffers? 1392 + * @vrh: the vring we've called vringh_complete_iotlb() on. 1393 + * 1394 + * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1395 + */ 1396 + int vringh_need_notify_iotlb(struct vringh *vrh) 1397 + { 1398 + return __vringh_need_notify(vrh, getu16_iotlb); 1399 + } 1400 + EXPORT_SYMBOL(vringh_need_notify_iotlb); 1401 + 1061 1402 1062 1403 MODULE_LICENSE("GPL");
+1 -1
drivers/vhost/vsock.c
··· 621 621 622 622 vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), 623 623 UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, 624 - VHOST_VSOCK_WEIGHT); 624 + VHOST_VSOCK_WEIGHT, NULL); 625 625 626 626 file->private_data = vsock; 627 627 spin_lock_init(&vsock->send_pkt_list_lock);
+13
drivers/virtio/Kconfig
··· 43 43 44 44 If unsure, say Y. 45 45 46 + config VIRTIO_VDPA 47 + tristate "vDPA driver for virtio devices" 48 + select VDPA 49 + select VIRTIO 50 + help 51 + This driver provides support for virtio based paravirtual 52 + device driver over vDPA bus. For this to be useful, you need 53 + an appropriate vDPA device implementation that operates on a 54 + physical device to allow the datapath of virtio to be 55 + offloaded to hardware. 56 + 57 + If unsure, say M. 58 + 46 59 config VIRTIO_PMEM 47 60 tristate "Support for virtio pmem driver" 48 61 depends on VIRTIO
+1
drivers/virtio/Makefile
··· 6 6 virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o 7 7 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o 8 8 obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o 9 + obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
+396
drivers/virtio/virtio_vdpa.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * VIRTIO based driver for vDPA device 4 + * 5 + * Copyright (c) 2020, Red Hat. All rights reserved. 6 + * Author: Jason Wang <jasowang@redhat.com> 7 + * 8 + */ 9 + 10 + #include <linux/init.h> 11 + #include <linux/module.h> 12 + #include <linux/device.h> 13 + #include <linux/kernel.h> 14 + #include <linux/slab.h> 15 + #include <linux/uuid.h> 16 + #include <linux/virtio.h> 17 + #include <linux/vdpa.h> 18 + #include <linux/virtio_config.h> 19 + #include <linux/virtio_ring.h> 20 + 21 + #define MOD_VERSION "0.1" 22 + #define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>" 23 + #define MOD_DESC "vDPA bus driver for virtio devices" 24 + #define MOD_LICENSE "GPL v2" 25 + 26 + struct virtio_vdpa_device { 27 + struct virtio_device vdev; 28 + struct vdpa_device *vdpa; 29 + u64 features; 30 + 31 + /* The lock to protect virtqueue list */ 32 + spinlock_t lock; 33 + /* List of virtio_vdpa_vq_info */ 34 + struct list_head virtqueues; 35 + }; 36 + 37 + struct virtio_vdpa_vq_info { 38 + /* the actual virtqueue */ 39 + struct virtqueue *vq; 40 + 41 + /* the list node for the virtqueues list */ 42 + struct list_head node; 43 + }; 44 + 45 + static inline struct virtio_vdpa_device * 46 + to_virtio_vdpa_device(struct virtio_device *dev) 47 + { 48 + return container_of(dev, struct virtio_vdpa_device, vdev); 49 + } 50 + 51 + static struct vdpa_device *vd_get_vdpa(struct virtio_device *vdev) 52 + { 53 + return to_virtio_vdpa_device(vdev)->vdpa; 54 + } 55 + 56 + static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, 57 + void *buf, unsigned len) 58 + { 59 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 60 + const struct vdpa_config_ops *ops = vdpa->config; 61 + 62 + ops->get_config(vdpa, offset, buf, len); 63 + } 64 + 65 + static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, 66 + const void *buf, unsigned len) 67 + { 68 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 69 + const struct vdpa_config_ops *ops = vdpa->config; 70 + 71 + ops->set_config(vdpa, offset, buf, len); 72 + } 73 + 74 + static u32 virtio_vdpa_generation(struct virtio_device *vdev) 75 + { 76 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 77 + const struct vdpa_config_ops *ops = vdpa->config; 78 + 79 + if (ops->get_generation) 80 + return ops->get_generation(vdpa); 81 + 82 + return 0; 83 + } 84 + 85 + static u8 virtio_vdpa_get_status(struct virtio_device *vdev) 86 + { 87 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 88 + const struct vdpa_config_ops *ops = vdpa->config; 89 + 90 + return ops->get_status(vdpa); 91 + } 92 + 93 + static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status) 94 + { 95 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 96 + const struct vdpa_config_ops *ops = vdpa->config; 97 + 98 + return ops->set_status(vdpa, status); 99 + } 100 + 101 + static void virtio_vdpa_reset(struct virtio_device *vdev) 102 + { 103 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 104 + const struct vdpa_config_ops *ops = vdpa->config; 105 + 106 + return ops->set_status(vdpa, 0); 107 + } 108 + 109 + static bool virtio_vdpa_notify(struct virtqueue *vq) 110 + { 111 + struct vdpa_device *vdpa = vd_get_vdpa(vq->vdev); 112 + const struct vdpa_config_ops *ops = vdpa->config; 113 + 114 + ops->kick_vq(vdpa, vq->index); 115 + 116 + return true; 117 + } 118 + 119 + static irqreturn_t virtio_vdpa_config_cb(void *private) 120 + { 121 + struct virtio_vdpa_device *vd_dev = private; 122 + 123 + virtio_config_changed(&vd_dev->vdev); 124 + 125 + return IRQ_HANDLED; 126 + } 127 + 128 + static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) 129 + { 130 + struct virtio_vdpa_vq_info *info = private; 131 + 132 + return vring_interrupt(0, info->vq); 133 + } 134 + 135 + static struct virtqueue * 136 + virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, 137 + void (*callback)(struct virtqueue *vq), 138 + const char *name, bool ctx) 139 + { 140 + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); 141 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 142 + const struct vdpa_config_ops *ops = vdpa->config; 143 + struct virtio_vdpa_vq_info *info; 144 + struct vdpa_callback cb; 145 + struct virtqueue *vq; 146 + u64 desc_addr, driver_addr, device_addr; 147 + unsigned long flags; 148 + u32 align, num; 149 + int err; 150 + 151 + if (!name) 152 + return NULL; 153 + 154 + /* Queue shouldn't already be set up. */ 155 + if (ops->get_vq_ready(vdpa, index)) 156 + return ERR_PTR(-ENOENT); 157 + 158 + /* Allocate and fill out our active queue description */ 159 + info = kmalloc(sizeof(*info), GFP_KERNEL); 160 + if (!info) 161 + return ERR_PTR(-ENOMEM); 162 + 163 + num = ops->get_vq_num_max(vdpa); 164 + if (num == 0) { 165 + err = -ENOENT; 166 + goto error_new_virtqueue; 167 + } 168 + 169 + /* Create the vring */ 170 + align = ops->get_vq_align(vdpa); 171 + vq = vring_create_virtqueue(index, num, align, vdev, 172 + true, true, ctx, 173 + virtio_vdpa_notify, callback, name); 174 + if (!vq) { 175 + err = -ENOMEM; 176 + goto error_new_virtqueue; 177 + } 178 + 179 + /* Setup virtqueue callback */ 180 + cb.callback = virtio_vdpa_virtqueue_cb; 181 + cb.private = info; 182 + ops->set_vq_cb(vdpa, index, &cb); 183 + ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); 184 + 185 + desc_addr = virtqueue_get_desc_addr(vq); 186 + driver_addr = virtqueue_get_avail_addr(vq); 187 + device_addr = virtqueue_get_used_addr(vq); 188 + 189 + if (ops->set_vq_address(vdpa, index, 190 + desc_addr, driver_addr, 191 + device_addr)) { 192 + err = -EINVAL; 193 + goto err_vq; 194 + } 195 + 196 + ops->set_vq_ready(vdpa, index, 1); 197 + 198 + vq->priv = info; 199 + info->vq = vq; 200 + 201 + spin_lock_irqsave(&vd_dev->lock, flags); 202 + list_add(&info->node, &vd_dev->virtqueues); 203 + spin_unlock_irqrestore(&vd_dev->lock, flags); 204 + 205 + return vq; 206 + 207 + err_vq: 208 + vring_del_virtqueue(vq); 209 + error_new_virtqueue: 210 + ops->set_vq_ready(vdpa, index, 0); 211 + /* VDPA driver should make sure vq is stopeed here */ 212 + WARN_ON(ops->get_vq_ready(vdpa, index)); 213 + kfree(info); 214 + return ERR_PTR(err); 215 + } 216 + 217 + static void virtio_vdpa_del_vq(struct virtqueue *vq) 218 + { 219 + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev); 220 + struct vdpa_device *vdpa = vd_dev->vdpa; 221 + const struct vdpa_config_ops *ops = vdpa->config; 222 + struct virtio_vdpa_vq_info *info = vq->priv; 223 + unsigned int index = vq->index; 224 + unsigned long flags; 225 + 226 + spin_lock_irqsave(&vd_dev->lock, flags); 227 + list_del(&info->node); 228 + spin_unlock_irqrestore(&vd_dev->lock, flags); 229 + 230 + /* Select and deactivate the queue */ 231 + ops->set_vq_ready(vdpa, index, 0); 232 + WARN_ON(ops->get_vq_ready(vdpa, index)); 233 + 234 + vring_del_virtqueue(vq); 235 + 236 + kfree(info); 237 + } 238 + 239 + static void virtio_vdpa_del_vqs(struct virtio_device *vdev) 240 + { 241 + struct virtqueue *vq, *n; 242 + 243 + list_for_each_entry_safe(vq, n, &vdev->vqs, list) 244 + virtio_vdpa_del_vq(vq); 245 + } 246 + 247 + static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs, 248 + struct virtqueue *vqs[], 249 + vq_callback_t *callbacks[], 250 + const char * const names[], 251 + const bool *ctx, 252 + struct irq_affinity *desc) 253 + { 254 + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); 255 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 256 + const struct vdpa_config_ops *ops = vdpa->config; 257 + struct vdpa_callback cb; 258 + int i, err, queue_idx = 0; 259 + 260 + for (i = 0; i < nvqs; ++i) { 261 + if (!names[i]) { 262 + vqs[i] = NULL; 263 + continue; 264 + } 265 + 266 + vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, 267 + callbacks[i], names[i], ctx ? 268 + ctx[i] : false); 269 + if (IS_ERR(vqs[i])) { 270 + err = PTR_ERR(vqs[i]); 271 + goto err_setup_vq; 272 + } 273 + } 274 + 275 + cb.callback = virtio_vdpa_config_cb; 276 + cb.private = vd_dev; 277 + ops->set_config_cb(vdpa, &cb); 278 + 279 + return 0; 280 + 281 + err_setup_vq: 282 + virtio_vdpa_del_vqs(vdev); 283 + return err; 284 + } 285 + 286 + static u64 virtio_vdpa_get_features(struct virtio_device *vdev) 287 + { 288 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 289 + const struct vdpa_config_ops *ops = vdpa->config; 290 + 291 + return ops->get_features(vdpa); 292 + } 293 + 294 + static int virtio_vdpa_finalize_features(struct virtio_device *vdev) 295 + { 296 + struct vdpa_device *vdpa = vd_get_vdpa(vdev); 297 + const struct vdpa_config_ops *ops = vdpa->config; 298 + 299 + /* Give virtio_ring a chance to accept features. */ 300 + vring_transport_features(vdev); 301 + 302 + return ops->set_features(vdpa, vdev->features); 303 + } 304 + 305 + static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) 306 + { 307 + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); 308 + struct vdpa_device *vdpa = vd_dev->vdpa; 309 + 310 + return dev_name(&vdpa->dev); 311 + } 312 + 313 + static const struct virtio_config_ops virtio_vdpa_config_ops = { 314 + .get = virtio_vdpa_get, 315 + .set = virtio_vdpa_set, 316 + .generation = virtio_vdpa_generation, 317 + .get_status = virtio_vdpa_get_status, 318 + .set_status = virtio_vdpa_set_status, 319 + .reset = virtio_vdpa_reset, 320 + .find_vqs = virtio_vdpa_find_vqs, 321 + .del_vqs = virtio_vdpa_del_vqs, 322 + .get_features = virtio_vdpa_get_features, 323 + .finalize_features = virtio_vdpa_finalize_features, 324 + .bus_name = virtio_vdpa_bus_name, 325 + }; 326 + 327 + static void virtio_vdpa_release_dev(struct device *_d) 328 + { 329 + struct virtio_device *vdev = 330 + container_of(_d, struct virtio_device, dev); 331 + struct virtio_vdpa_device *vd_dev = 332 + container_of(vdev, struct virtio_vdpa_device, vdev); 333 + 334 + kfree(vd_dev); 335 + } 336 + 337 + static int virtio_vdpa_probe(struct vdpa_device *vdpa) 338 + { 339 + const struct vdpa_config_ops *ops = vdpa->config; 340 + struct virtio_vdpa_device *vd_dev, *reg_dev = NULL; 341 + int ret = -EINVAL; 342 + 343 + vd_dev = kzalloc(sizeof(*vd_dev), GFP_KERNEL); 344 + if (!vd_dev) 345 + return -ENOMEM; 346 + 347 + vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa); 348 + vd_dev->vdev.dev.release = virtio_vdpa_release_dev; 349 + vd_dev->vdev.config = &virtio_vdpa_config_ops; 350 + vd_dev->vdpa = vdpa; 351 + INIT_LIST_HEAD(&vd_dev->virtqueues); 352 + spin_lock_init(&vd_dev->lock); 353 + 354 + vd_dev->vdev.id.device = ops->get_device_id(vdpa); 355 + if (vd_dev->vdev.id.device == 0) 356 + goto err; 357 + 358 + vd_dev->vdev.id.vendor = ops->get_vendor_id(vdpa); 359 + ret = register_virtio_device(&vd_dev->vdev); 360 + reg_dev = vd_dev; 361 + if (ret) 362 + goto err; 363 + 364 + vdpa_set_drvdata(vdpa, vd_dev); 365 + 366 + return 0; 367 + 368 + err: 369 + if (reg_dev) 370 + put_device(&vd_dev->vdev.dev); 371 + else 372 + kfree(vd_dev); 373 + return ret; 374 + } 375 + 376 + static void virtio_vdpa_remove(struct vdpa_device *vdpa) 377 + { 378 + struct virtio_vdpa_device *vd_dev = vdpa_get_drvdata(vdpa); 379 + 380 + unregister_virtio_device(&vd_dev->vdev); 381 + } 382 + 383 + static struct vdpa_driver virtio_vdpa_driver = { 384 + .driver = { 385 + .name = "virtio_vdpa", 386 + }, 387 + .probe = virtio_vdpa_probe, 388 + .remove = virtio_vdpa_remove, 389 + }; 390 + 391 + module_vdpa_driver(virtio_vdpa_driver); 392 + 393 + MODULE_VERSION(MOD_VERSION); 394 + MODULE_LICENSE(MOD_LICENSE); 395 + MODULE_AUTHOR(MOD_AUTHOR); 396 + MODULE_DESCRIPTION(MOD_DESC);
+253
include/linux/vdpa.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_VDPA_H 3 + #define _LINUX_VDPA_H 4 + 5 + #include <linux/kernel.h> 6 + #include <linux/device.h> 7 + #include <linux/interrupt.h> 8 + #include <linux/vhost_iotlb.h> 9 + 10 + /** 11 + * vDPA callback definition. 12 + * @callback: interrupt callback function 13 + * @private: the data passed to the callback function 14 + */ 15 + struct vdpa_callback { 16 + irqreturn_t (*callback)(void *data); 17 + void *private; 18 + }; 19 + 20 + /** 21 + * vDPA device - representation of a vDPA device 22 + * @dev: underlying device 23 + * @dma_dev: the actual device that is performing DMA 24 + * @config: the configuration ops for this device. 25 + * @index: device index 26 + */ 27 + struct vdpa_device { 28 + struct device dev; 29 + struct device *dma_dev; 30 + const struct vdpa_config_ops *config; 31 + unsigned int index; 32 + }; 33 + 34 + /** 35 + * vDPA_config_ops - operations for configuring a vDPA device. 36 + * Note: vDPA device drivers are required to implement all of the 37 + * operations unless it is mentioned to be optional in the following 38 + * list. 39 + * 40 + * @set_vq_address: Set the address of virtqueue 41 + * @vdev: vdpa device 42 + * @idx: virtqueue index 43 + * @desc_area: address of desc area 44 + * @driver_area: address of driver area 45 + * @device_area: address of device area 46 + * Returns integer: success (0) or error (< 0) 47 + * @set_vq_num: Set the size of virtqueue 48 + * @vdev: vdpa device 49 + * @idx: virtqueue index 50 + * @num: the size of virtqueue 51 + * @kick_vq: Kick the virtqueue 52 + * @vdev: vdpa device 53 + * @idx: virtqueue index 54 + * @set_vq_cb: Set the interrupt callback function for 55 + * a virtqueue 56 + * @vdev: vdpa device 57 + * @idx: virtqueue index 58 + * @cb: virtio-vdev interrupt callback structure 59 + * @set_vq_ready: Set ready status for a virtqueue 60 + * @vdev: vdpa device 61 + * @idx: virtqueue index 62 + * @ready: ready (true) not ready(false) 63 + * @get_vq_ready: Get ready status for a virtqueue 64 + * @vdev: vdpa device 65 + * @idx: virtqueue index 66 + * Returns boolean: ready (true) or not (false) 67 + * @set_vq_state: Set the state for a virtqueue 68 + * @vdev: vdpa device 69 + * @idx: virtqueue index 70 + * @state: virtqueue state (last_avail_idx) 71 + * Returns integer: success (0) or error (< 0) 72 + * @get_vq_state: Get the state for a virtqueue 73 + * @vdev: vdpa device 74 + * @idx: virtqueue index 75 + * Returns virtqueue state (last_avail_idx) 76 + * @get_vq_align: Get the virtqueue align requirement 77 + * for the device 78 + * @vdev: vdpa device 79 + * Returns virtqueue algin requirement 80 + * @get_features: Get virtio features supported by the device 81 + * @vdev: vdpa device 82 + * Returns the virtio features support by the 83 + * device 84 + * @set_features: Set virtio features supported by the driver 85 + * @vdev: vdpa device 86 + * @features: feature support by the driver 87 + * Returns integer: success (0) or error (< 0) 88 + * @set_config_cb: Set the config interrupt callback 89 + * @vdev: vdpa device 90 + * @cb: virtio-vdev interrupt callback structure 91 + * @get_vq_num_max: Get the max size of virtqueue 92 + * @vdev: vdpa device 93 + * Returns u16: max size of virtqueue 94 + * @get_device_id: Get virtio device id 95 + * @vdev: vdpa device 96 + * Returns u32: virtio device id 97 + * @get_vendor_id: Get id for the vendor that provides this device 98 + * @vdev: vdpa device 99 + * Returns u32: virtio vendor id 100 + * @get_status: Get the device status 101 + * @vdev: vdpa device 102 + * Returns u8: virtio device status 103 + * @set_status: Set the device status 104 + * @vdev: vdpa device 105 + * @status: virtio device status 106 + * @get_config: Read from device specific configuration space 107 + * @vdev: vdpa device 108 + * @offset: offset from the beginning of 109 + * configuration space 110 + * @buf: buffer used to read to 111 + * @len: the length to read from 112 + * configuration space 113 + * @set_config: Write to device specific configuration space 114 + * @vdev: vdpa device 115 + * @offset: offset from the beginning of 116 + * configuration space 117 + * @buf: buffer used to write from 118 + * @len: the length to write to 119 + * configuration space 120 + * @get_generation: Get device config generation (optional) 121 + * @vdev: vdpa device 122 + * Returns u32: device generation 123 + * @set_map: Set device memory mapping (optional) 124 + * Needed for device that using device 125 + * specific DMA translation (on-chip IOMMU) 126 + * @vdev: vdpa device 127 + * @iotlb: vhost memory mapping to be 128 + * used by the vDPA 129 + * Returns integer: success (0) or error (< 0) 130 + * @dma_map: Map an area of PA to IOVA (optional) 131 + * Needed for device that using device 132 + * specific DMA translation (on-chip IOMMU) 133 + * and preferring incremental map. 134 + * @vdev: vdpa device 135 + * @iova: iova to be mapped 136 + * @size: size of the area 137 + * @pa: physical address for the map 138 + * @perm: device access permission (VHOST_MAP_XX) 139 + * Returns integer: success (0) or error (< 0) 140 + * @dma_unmap: Unmap an area of IOVA (optional but 141 + * must be implemented with dma_map) 142 + * Needed for device that using device 143 + * specific DMA translation (on-chip IOMMU) 144 + * and preferring incremental unmap. 145 + * @vdev: vdpa device 146 + * @iova: iova to be unmapped 147 + * @size: size of the area 148 + * Returns integer: success (0) or error (< 0) 149 + * @free: Free resources that belongs to vDPA (optional) 150 + * @vdev: vdpa device 151 + */ 152 + struct vdpa_config_ops { 153 + /* Virtqueue ops */ 154 + int (*set_vq_address)(struct vdpa_device *vdev, 155 + u16 idx, u64 desc_area, u64 driver_area, 156 + u64 device_area); 157 + void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num); 158 + void (*kick_vq)(struct vdpa_device *vdev, u16 idx); 159 + void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx, 160 + struct vdpa_callback *cb); 161 + void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready); 162 + bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); 163 + int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state); 164 + u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); 165 + 166 + /* Device ops */ 167 + u16 (*get_vq_align)(struct vdpa_device *vdev); 168 + u64 (*get_features)(struct vdpa_device *vdev); 169 + int (*set_features)(struct vdpa_device *vdev, u64 features); 170 + void (*set_config_cb)(struct vdpa_device *vdev, 171 + struct vdpa_callback *cb); 172 + u16 (*get_vq_num_max)(struct vdpa_device *vdev); 173 + u32 (*get_device_id)(struct vdpa_device *vdev); 174 + u32 (*get_vendor_id)(struct vdpa_device *vdev); 175 + u8 (*get_status)(struct vdpa_device *vdev); 176 + void (*set_status)(struct vdpa_device *vdev, u8 status); 177 + void (*get_config)(struct vdpa_device *vdev, unsigned int offset, 178 + void *buf, unsigned int len); 179 + void (*set_config)(struct vdpa_device *vdev, unsigned int offset, 180 + const void *buf, unsigned int len); 181 + u32 (*get_generation)(struct vdpa_device *vdev); 182 + 183 + /* DMA ops */ 184 + int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); 185 + int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, 186 + u64 pa, u32 perm); 187 + int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); 188 + 189 + /* Free device resources */ 190 + void (*free)(struct vdpa_device *vdev); 191 + }; 192 + 193 + struct vdpa_device *__vdpa_alloc_device(struct device *parent, 194 + const struct vdpa_config_ops *config, 195 + size_t size); 196 + 197 + #define vdpa_alloc_device(dev_struct, member, parent, config) \ 198 + container_of(__vdpa_alloc_device( \ 199 + parent, config, \ 200 + sizeof(dev_struct) + \ 201 + BUILD_BUG_ON_ZERO(offsetof( \ 202 + dev_struct, member))), \ 203 + dev_struct, member) 204 + 205 + int vdpa_register_device(struct vdpa_device *vdev); 206 + void vdpa_unregister_device(struct vdpa_device *vdev); 207 + 208 + /** 209 + * vdpa_driver - operations for a vDPA driver 210 + * @driver: underlying device driver 211 + * @probe: the function to call when a device is found. Returns 0 or -errno. 212 + * @remove: the function to call when a device is removed. 213 + */ 214 + struct vdpa_driver { 215 + struct device_driver driver; 216 + int (*probe)(struct vdpa_device *vdev); 217 + void (*remove)(struct vdpa_device *vdev); 218 + }; 219 + 220 + #define vdpa_register_driver(drv) \ 221 + __vdpa_register_driver(drv, THIS_MODULE) 222 + int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner); 223 + void vdpa_unregister_driver(struct vdpa_driver *drv); 224 + 225 + #define module_vdpa_driver(__vdpa_driver) \ 226 + module_driver(__vdpa_driver, vdpa_register_driver, \ 227 + vdpa_unregister_driver) 228 + 229 + static inline struct vdpa_driver *drv_to_vdpa(struct device_driver *driver) 230 + { 231 + return container_of(driver, struct vdpa_driver, driver); 232 + } 233 + 234 + static inline struct vdpa_device *dev_to_vdpa(struct device *_dev) 235 + { 236 + return container_of(_dev, struct vdpa_device, dev); 237 + } 238 + 239 + static inline void *vdpa_get_drvdata(const struct vdpa_device *vdev) 240 + { 241 + return dev_get_drvdata(&vdev->dev); 242 + } 243 + 244 + static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data) 245 + { 246 + dev_set_drvdata(&vdev->dev, data); 247 + } 248 + 249 + static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) 250 + { 251 + return vdev->dma_dev; 252 + } 253 + #endif /* _LINUX_VDPA_H */
+47
include/linux/vhost_iotlb.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_VHOST_IOTLB_H 3 + #define _LINUX_VHOST_IOTLB_H 4 + 5 + #include <linux/interval_tree_generic.h> 6 + 7 + struct vhost_iotlb_map { 8 + struct rb_node rb; 9 + struct list_head link; 10 + u64 start; 11 + u64 last; 12 + u64 size; 13 + u64 addr; 14 + #define VHOST_MAP_RO 0x1 15 + #define VHOST_MAP_WO 0x2 16 + #define VHOST_MAP_RW 0x3 17 + u32 perm; 18 + u32 flags_padding; 19 + u64 __subtree_last; 20 + }; 21 + 22 + #define VHOST_IOTLB_FLAG_RETIRE 0x1 23 + 24 + struct vhost_iotlb { 25 + struct rb_root_cached root; 26 + struct list_head list; 27 + unsigned int limit; 28 + unsigned int nmaps; 29 + unsigned int flags; 30 + }; 31 + 32 + int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last, 33 + u64 addr, unsigned int perm); 34 + void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); 35 + 36 + struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags); 37 + void vhost_iotlb_free(struct vhost_iotlb *iotlb); 38 + void vhost_iotlb_reset(struct vhost_iotlb *iotlb); 39 + 40 + struct vhost_iotlb_map * 41 + vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last); 42 + struct vhost_iotlb_map * 43 + vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last); 44 + 45 + void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, 46 + struct vhost_iotlb_map *map); 47 + #endif
+36
include/linux/vringh.h
··· 14 14 #include <linux/virtio_byteorder.h> 15 15 #include <linux/uio.h> 16 16 #include <linux/slab.h> 17 + #include <linux/dma-direction.h> 18 + #include <linux/vhost_iotlb.h> 17 19 #include <asm/barrier.h> 18 20 19 21 /* virtio_ring with information needed for host access. */ ··· 40 38 41 39 /* The vring (note: it may contain user pointers!) */ 42 40 struct vring vring; 41 + 42 + /* IOTLB for this vring */ 43 + struct vhost_iotlb *iotlb; 43 44 44 45 /* The function to call to notify the guest about added buffers */ 45 46 void (*notify)(struct vringh *); ··· 253 248 { 254 249 return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); 255 250 } 251 + 252 + void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); 253 + 254 + int vringh_init_iotlb(struct vringh *vrh, u64 features, 255 + unsigned int num, bool weak_barriers, 256 + struct vring_desc *desc, 257 + struct vring_avail *avail, 258 + struct vring_used *used); 259 + 260 + int vringh_getdesc_iotlb(struct vringh *vrh, 261 + struct vringh_kiov *riov, 262 + struct vringh_kiov *wiov, 263 + u16 *head, 264 + gfp_t gfp); 265 + 266 + ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 267 + struct vringh_kiov *riov, 268 + void *dst, size_t len); 269 + ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 270 + struct vringh_kiov *wiov, 271 + const void *src, size_t len); 272 + 273 + void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); 274 + 275 + int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); 276 + 277 + bool vringh_notify_enable_iotlb(struct vringh *vrh); 278 + void vringh_notify_disable_iotlb(struct vringh *vrh); 279 + 280 + int vringh_need_notify_iotlb(struct vringh *vrh); 281 + 256 282 #endif /* _LINUX_VRINGH_H */
+24
include/uapi/linux/vhost.h
··· 116 116 #define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) 117 117 #define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) 118 118 119 + /* VHOST_VDPA specific defines */ 120 + 121 + /* Get the device id. The device ids follow the same definition of 122 + * the device id defined in virtio-spec. 123 + */ 124 + #define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32) 125 + /* Get and set the status. The status bits follow the same definition 126 + * of the device status defined in virtio-spec. 127 + */ 128 + #define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8) 129 + #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) 130 + /* Get and set the device config. The device config follows the same 131 + * definition of the device config defined in virtio-spec. 132 + */ 133 + #define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \ 134 + struct vhost_vdpa_config) 135 + #define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \ 136 + struct vhost_vdpa_config) 137 + /* Enable/disable the ring. */ 138 + #define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ 139 + struct vhost_vring_state) 140 + /* Get the max ring size. */ 141 + #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) 142 + 119 143 #endif
+8
include/uapi/linux/vhost_types.h
··· 119 119 unsigned short reserved; 120 120 }; 121 121 122 + /* VHOST_VDPA specific definitions */ 123 + 124 + struct vhost_vdpa_config { 125 + __u32 off; 126 + __u32 len; 127 + __u8 buf[0]; 128 + }; 129 + 122 130 /* Feature bits */ 123 131 /* Log all write descriptors. Can be changed while device is active. */ 124 132 #define VHOST_F_LOG_ALL 26
+98 -4
include/uapi/linux/virtio_net.h
··· 57 57 * Steering */ 58 58 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ 59 59 60 + #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ 61 + #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ 62 + #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ 60 63 #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device 61 64 * with the same MAC. 62 65 */ ··· 71 68 72 69 #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ 73 70 #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ 71 + 72 + /* supported/enabled hash types */ 73 + #define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0) 74 + #define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1) 75 + #define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2) 76 + #define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3) 77 + #define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4) 78 + #define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5) 79 + #define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6) 80 + #define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7) 81 + #define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8) 74 82 75 83 struct virtio_net_config { 76 84 /* The config defining mac address (if VIRTIO_NET_F_MAC) */ ··· 106 92 * Any other value stands for unknown. 107 93 */ 108 94 __u8 duplex; 95 + /* maximum size of RSS key */ 96 + __u8 rss_max_key_size; 97 + /* maximum number of indirection table entries */ 98 + __le16 rss_max_indirection_table_length; 99 + /* bitmask of supported VIRTIO_NET_RSS_HASH_ types */ 100 + __le32 supported_hash_types; 109 101 } __attribute__((packed)); 110 102 111 103 /* ··· 124 104 struct virtio_net_hdr_v1 { 125 105 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ 126 106 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 107 + #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ 127 108 __u8 flags; 128 109 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ 129 110 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ ··· 134 113 __u8 gso_type; 135 114 __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ 136 115 __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ 137 - __virtio16 csum_start; /* Position to start checksumming from */ 138 - __virtio16 csum_offset; /* Offset after that to place checksum */ 116 + union { 117 + struct { 118 + __virtio16 csum_start; 119 + __virtio16 csum_offset; 120 + }; 121 + /* Checksum calculation */ 122 + struct { 123 + /* Position to start checksumming from */ 124 + __virtio16 start; 125 + /* Offset after that to place checksum */ 126 + __virtio16 offset; 127 + } csum; 128 + /* Receive Segment Coalescing */ 129 + struct { 130 + /* Number of coalesced segments */ 131 + __le16 segments; 132 + /* Number of duplicated acks */ 133 + __le16 dup_acks; 134 + } rsc; 135 + }; 139 136 __virtio16 num_buffers; /* Number of merged rx buffers */ 137 + }; 138 + 139 + struct virtio_net_hdr_v1_hash { 140 + struct virtio_net_hdr_v1 hdr; 141 + __le32 hash_value; 142 + #define VIRTIO_NET_HASH_REPORT_NONE 0 143 + #define VIRTIO_NET_HASH_REPORT_IPv4 1 144 + #define VIRTIO_NET_HASH_REPORT_TCPv4 2 145 + #define VIRTIO_NET_HASH_REPORT_UDPv4 3 146 + #define VIRTIO_NET_HASH_REPORT_IPv6 4 147 + #define VIRTIO_NET_HASH_REPORT_TCPv6 5 148 + #define VIRTIO_NET_HASH_REPORT_UDPv6 6 149 + #define VIRTIO_NET_HASH_REPORT_IPv6_EX 7 150 + #define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8 151 + #define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9 152 + __le16 hash_report; 153 + __le16 padding; 140 154 }; 141 155 142 156 #ifndef VIRTIO_NET_NO_LEGACY ··· 284 228 285 229 /* 286 230 * Control Receive Flow Steering 287 - * 231 + */ 232 + #define VIRTIO_NET_CTRL_MQ 4 233 + /* 288 234 * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 289 235 * enables Receive Flow Steering, specifying the number of the transmit and 290 236 * receive queues that will be used. After the command is consumed and acked by ··· 299 241 __virtio16 virtqueue_pairs; 300 242 }; 301 243 302 - #define VIRTIO_NET_CTRL_MQ 4 303 244 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 304 245 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 305 246 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 247 + 248 + /* 249 + * The command VIRTIO_NET_CTRL_MQ_RSS_CONFIG has the same effect as 250 + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET does and additionally configures 251 + * the receive steering to use a hash calculated for incoming packet 252 + * to decide on receive virtqueue to place the packet. The command 253 + * also provides parameters to calculate a hash and receive virtqueue. 254 + */ 255 + struct virtio_net_rss_config { 256 + __le32 hash_types; 257 + __le16 indirection_table_mask; 258 + __le16 unclassified_queue; 259 + __le16 indirection_table[1/* + indirection_table_mask */]; 260 + __le16 max_tx_vq; 261 + __u8 hash_key_length; 262 + __u8 hash_key_data[/* hash_key_length */]; 263 + }; 264 + 265 + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 266 + 267 + /* 268 + * The command VIRTIO_NET_CTRL_MQ_HASH_CONFIG requests the device 269 + * to include in the virtio header of the packet the value of the 270 + * calculated hash and the report type of hash. It also provides 271 + * parameters for hash calculation. The command requires feature 272 + * VIRTIO_NET_F_HASH_REPORT to be negotiated to extend the 273 + * layout of virtio header as defined in virtio_net_hdr_v1_hash. 274 + */ 275 + struct virtio_net_hash_config { 276 + __le32 hash_types; 277 + /* for compatibility with virtio_net_rss_config */ 278 + __le16 reserved[4]; 279 + __u8 hash_key_length; 280 + __u8 hash_key_data[/* hash_key_length */]; 281 + }; 282 + 283 + #define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 306 284 307 285 /* 308 286 * Control network offloads
+26 -1
tools/virtio/Makefile
··· 8 8 vpath %.c ../../drivers/virtio ../../drivers/vhost 9 9 mod: 10 10 ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} 11 - .PHONY: all test mod clean 11 + 12 + #oot: build vhost as an out of tree module for a distro kernel 13 + #no effort is taken to make it actually build or work, but tends to mostly work 14 + #if the distro kernel is very close to upstream 15 + #unsupported! this is a development tool only, don't use the 16 + #resulting modules in production! 17 + OOT_KSRC=/lib/modules/$$(uname -r)/build 18 + OOT_VHOST=`pwd`/../../drivers/vhost 19 + #Everyone depends on vhost 20 + #Tweak the below to enable more modules 21 + OOT_CONFIGS=\ 22 + CONFIG_VHOST=m \ 23 + CONFIG_VHOST_NET=n \ 24 + CONFIG_VHOST_SCSI=n \ 25 + CONFIG_VHOST_VSOCK=n 26 + OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V} 27 + oot-build: 28 + echo "UNSUPPORTED! Don't use the resulting modules in production!" 29 + ${OOT_BUILD} M=`pwd`/vhost_test 30 + ${OOT_BUILD} M=${OOT_VHOST} ${OOT_CONFIGS} 31 + 32 + oot-clean: oot-build 33 + oot: oot-build 34 + oot-clean: OOT_BUILD+=clean 35 + 36 + .PHONY: all test mod clean vhost oot oot-clean oot-build 12 37 clean: 13 38 ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ 14 39 vhost_test/Module.symvers vhost_test/modules.order *.d