Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virt: acrn: Introduce ioeventfd

ioeventfd is a mechanism to register PIO/MMIO regions to trigger an
eventfd signal when written to by a User VM. ACRN userspace can register
any arbitrary I/O address with a corresponding eventfd and then pass the
eventfd to a specific end-point of interest for handling.

Vhost is a kernel-level virtio server which uses eventfd for signalling.
To support vhost on ACRN, ioeventfd is introduced in HSM.

A new I/O client dedicated to ioeventfd is associated with a User VM
during VM creation. HSM provides ioctls to associate an I/O region with
a eventfd. The I/O client signals a eventfd once its corresponding I/O
region is matched with an I/O request.

Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Yu Wang <yu1.wang@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Link: https://lore.kernel.org/r/20210207031040.49576-16-shuo.a.liu@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Shuo Liu and committed by
Greg Kroah-Hartman
d8ad5151 5a0c9f17

+327 -1
+1
drivers/virt/acrn/Kconfig
··· 2 2 config ACRN_HSM 3 3 tristate "ACRN Hypervisor Service Module" 4 4 depends on ACRN_GUEST 5 + select EVENTFD 5 6 help 6 7 ACRN Hypervisor Service Module (HSM) is a kernel module which 7 8 communicates with ACRN userspace through ioctls and talks to
+1 -1
drivers/virt/acrn/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_ACRN_HSM) := acrn.o 3 - acrn-y := hsm.o vm.o mm.o ioreq.o 3 + acrn-y := hsm.o vm.o mm.o ioreq.o ioeventfd.o
+10
drivers/virt/acrn/acrn_drv.h
··· 156 156 * @ioreq_page: The page of the I/O request shared buffer 157 157 * @pci_conf_addr: Address of a PCI configuration access emulation 158 158 * @monitor_page: Page of interrupt statistics of User VM 159 + * @ioeventfds_lock: Lock to protect ioeventfds list 160 + * @ioeventfds: List to link all hsm_ioeventfd 161 + * @ioeventfd_client: I/O client for ioeventfds of the VM 159 162 */ 160 163 struct acrn_vm { 161 164 struct list_head list; ··· 175 172 struct page *ioreq_page; 176 173 u32 pci_conf_addr; 177 174 struct page *monitor_page; 175 + struct mutex ioeventfds_lock; 176 + struct list_head ioeventfds; 177 + struct acrn_ioreq_client *ioeventfd_client; 178 178 }; 179 179 180 180 struct acrn_vm *acrn_vm_create(struct acrn_vm *vm, ··· 209 203 u32 type, u64 start, u64 end); 210 204 211 205 int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data); 206 + 207 + int acrn_ioeventfd_init(struct acrn_vm *vm); 208 + int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args); 209 + void acrn_ioeventfd_deinit(struct acrn_vm *vm); 212 210 213 211 #endif /* __ACRN_HSM_DRV_H */
+11
drivers/virt/acrn/hsm.c
··· 111 111 struct acrn_vcpu_regs *cpu_regs; 112 112 struct acrn_ioreq_notify notify; 113 113 struct acrn_ptdev_irq *irq_info; 114 + struct acrn_ioeventfd ioeventfd; 114 115 struct acrn_vm_memmap memmap; 115 116 struct acrn_msi_entry *msi; 116 117 struct acrn_pcidev *pcidev; ··· 336 335 return -EFAULT; 337 336 338 337 ret = pmcmd_ioctl(cstate_cmd, (void __user *)ioctl_param); 338 + break; 339 + case ACRN_IOCTL_IOEVENTFD: 340 + if (copy_from_user(&ioeventfd, (void __user *)ioctl_param, 341 + sizeof(ioeventfd))) 342 + return -EFAULT; 343 + 344 + if (ioeventfd.reserved != 0) 345 + return -EINVAL; 346 + 347 + ret = acrn_ioeventfd_config(vm, &ioeventfd); 339 348 break; 340 349 default: 341 350 dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd);
+273
drivers/virt/acrn/ioeventfd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * ACRN HSM eventfd - use eventfd objects to signal expected I/O requests 4 + * 5 + * Copyright (C) 2020 Intel Corporation. All rights reserved. 6 + * 7 + * Authors: 8 + * Shuo Liu <shuo.a.liu@intel.com> 9 + * Yakui Zhao <yakui.zhao@intel.com> 10 + */ 11 + 12 + #include <linux/eventfd.h> 13 + #include <linux/slab.h> 14 + 15 + #include "acrn_drv.h" 16 + 17 + /** 18 + * struct hsm_ioeventfd - Properties of HSM ioeventfd 19 + * @list: Entry within &acrn_vm.ioeventfds of ioeventfds of a VM 20 + * @eventfd: Eventfd of the HSM ioeventfd 21 + * @addr: Address of I/O range 22 + * @data: Data for matching 23 + * @length: Length of I/O range 24 + * @type: Type of I/O range (ACRN_IOREQ_TYPE_MMIO/ACRN_IOREQ_TYPE_PORTIO) 25 + * @wildcard: Data matching or not 26 + */ 27 + struct hsm_ioeventfd { 28 + struct list_head list; 29 + struct eventfd_ctx *eventfd; 30 + u64 addr; 31 + u64 data; 32 + int length; 33 + int type; 34 + bool wildcard; 35 + }; 36 + 37 + static inline int ioreq_type_from_flags(int flags) 38 + { 39 + return flags & ACRN_IOEVENTFD_FLAG_PIO ? 40 + ACRN_IOREQ_TYPE_PORTIO : ACRN_IOREQ_TYPE_MMIO; 41 + } 42 + 43 + static void acrn_ioeventfd_shutdown(struct acrn_vm *vm, struct hsm_ioeventfd *p) 44 + { 45 + lockdep_assert_held(&vm->ioeventfds_lock); 46 + 47 + eventfd_ctx_put(p->eventfd); 48 + list_del(&p->list); 49 + kfree(p); 50 + } 51 + 52 + static bool hsm_ioeventfd_is_conflict(struct acrn_vm *vm, 53 + struct hsm_ioeventfd *ioeventfd) 54 + { 55 + struct hsm_ioeventfd *p; 56 + 57 + lockdep_assert_held(&vm->ioeventfds_lock); 58 + 59 + /* Either one is wildcard, the data matching will be skipped. */ 60 + list_for_each_entry(p, &vm->ioeventfds, list) 61 + if (p->eventfd == ioeventfd->eventfd && 62 + p->addr == ioeventfd->addr && 63 + p->type == ioeventfd->type && 64 + (p->wildcard || ioeventfd->wildcard || 65 + p->data == ioeventfd->data)) 66 + return true; 67 + 68 + return false; 69 + } 70 + 71 + /* 72 + * Assign an eventfd to a VM and create a HSM ioeventfd associated with the 73 + * eventfd. The properties of the HSM ioeventfd are built from a &struct 74 + * acrn_ioeventfd. 75 + */ 76 + static int acrn_ioeventfd_assign(struct acrn_vm *vm, 77 + struct acrn_ioeventfd *args) 78 + { 79 + struct eventfd_ctx *eventfd; 80 + struct hsm_ioeventfd *p; 81 + int ret; 82 + 83 + /* Check for range overflow */ 84 + if (args->addr + args->len < args->addr) 85 + return -EINVAL; 86 + 87 + /* 88 + * Currently, acrn_ioeventfd is used to support vhost. 1,2,4,8 width 89 + * accesses can cover vhost's requirements. 90 + */ 91 + if (!(args->len == 1 || args->len == 2 || 92 + args->len == 4 || args->len == 8)) 93 + return -EINVAL; 94 + 95 + eventfd = eventfd_ctx_fdget(args->fd); 96 + if (IS_ERR(eventfd)) 97 + return PTR_ERR(eventfd); 98 + 99 + p = kzalloc(sizeof(*p), GFP_KERNEL); 100 + if (!p) { 101 + ret = -ENOMEM; 102 + goto fail; 103 + } 104 + 105 + INIT_LIST_HEAD(&p->list); 106 + p->addr = args->addr; 107 + p->length = args->len; 108 + p->eventfd = eventfd; 109 + p->type = ioreq_type_from_flags(args->flags); 110 + 111 + /* 112 + * ACRN_IOEVENTFD_FLAG_DATAMATCH flag is set in virtio 1.0 support, the 113 + * writing of notification register of each virtqueue may trigger the 114 + * notification. There is no data matching requirement. 115 + */ 116 + if (args->flags & ACRN_IOEVENTFD_FLAG_DATAMATCH) 117 + p->data = args->data; 118 + else 119 + p->wildcard = true; 120 + 121 + mutex_lock(&vm->ioeventfds_lock); 122 + 123 + if (hsm_ioeventfd_is_conflict(vm, p)) { 124 + ret = -EEXIST; 125 + goto unlock_fail; 126 + } 127 + 128 + /* register the I/O range into ioreq client */ 129 + ret = acrn_ioreq_range_add(vm->ioeventfd_client, p->type, 130 + p->addr, p->addr + p->length - 1); 131 + if (ret < 0) 132 + goto unlock_fail; 133 + 134 + list_add_tail(&p->list, &vm->ioeventfds); 135 + mutex_unlock(&vm->ioeventfds_lock); 136 + 137 + return 0; 138 + 139 + unlock_fail: 140 + mutex_unlock(&vm->ioeventfds_lock); 141 + kfree(p); 142 + fail: 143 + eventfd_ctx_put(eventfd); 144 + return ret; 145 + } 146 + 147 + static int acrn_ioeventfd_deassign(struct acrn_vm *vm, 148 + struct acrn_ioeventfd *args) 149 + { 150 + struct hsm_ioeventfd *p; 151 + struct eventfd_ctx *eventfd; 152 + 153 + eventfd = eventfd_ctx_fdget(args->fd); 154 + if (IS_ERR(eventfd)) 155 + return PTR_ERR(eventfd); 156 + 157 + mutex_lock(&vm->ioeventfds_lock); 158 + list_for_each_entry(p, &vm->ioeventfds, list) { 159 + if (p->eventfd != eventfd) 160 + continue; 161 + 162 + acrn_ioreq_range_del(vm->ioeventfd_client, p->type, 163 + p->addr, p->addr + p->length - 1); 164 + acrn_ioeventfd_shutdown(vm, p); 165 + break; 166 + } 167 + mutex_unlock(&vm->ioeventfds_lock); 168 + 169 + eventfd_ctx_put(eventfd); 170 + return 0; 171 + } 172 + 173 + static struct hsm_ioeventfd *hsm_ioeventfd_match(struct acrn_vm *vm, u64 addr, 174 + u64 data, int len, int type) 175 + { 176 + struct hsm_ioeventfd *p = NULL; 177 + 178 + lockdep_assert_held(&vm->ioeventfds_lock); 179 + 180 + list_for_each_entry(p, &vm->ioeventfds, list) { 181 + if (p->type == type && p->addr == addr && p->length >= len && 182 + (p->wildcard || p->data == data)) 183 + return p; 184 + } 185 + 186 + return NULL; 187 + } 188 + 189 + static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client, 190 + struct acrn_io_request *req) 191 + { 192 + struct hsm_ioeventfd *p; 193 + u64 addr, val; 194 + int size; 195 + 196 + if (req->type == ACRN_IOREQ_TYPE_MMIO) { 197 + /* 198 + * I/O requests are dispatched by range check only, so a 199 + * acrn_ioreq_client need process both READ and WRITE accesses 200 + * of same range. READ accesses are safe to be ignored here 201 + * because virtio PCI devices write the notify registers for 202 + * notification. 203 + */ 204 + if (req->reqs.mmio_request.direction == ACRN_IOREQ_DIR_READ) { 205 + /* reading does nothing and return 0 */ 206 + req->reqs.mmio_request.value = 0; 207 + return 0; 208 + } 209 + addr = req->reqs.mmio_request.address; 210 + size = req->reqs.mmio_request.size; 211 + val = req->reqs.mmio_request.value; 212 + } else { 213 + if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ) { 214 + /* reading does nothing and return 0 */ 215 + req->reqs.pio_request.value = 0; 216 + return 0; 217 + } 218 + addr = req->reqs.pio_request.address; 219 + size = req->reqs.pio_request.size; 220 + val = req->reqs.pio_request.value; 221 + } 222 + 223 + mutex_lock(&client->vm->ioeventfds_lock); 224 + p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type); 225 + if (p) 226 + eventfd_signal(p->eventfd, 1); 227 + mutex_unlock(&client->vm->ioeventfds_lock); 228 + 229 + return 0; 230 + } 231 + 232 + int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args) 233 + { 234 + int ret; 235 + 236 + if (args->flags & ACRN_IOEVENTFD_FLAG_DEASSIGN) 237 + ret = acrn_ioeventfd_deassign(vm, args); 238 + else 239 + ret = acrn_ioeventfd_assign(vm, args); 240 + 241 + return ret; 242 + } 243 + 244 + int acrn_ioeventfd_init(struct acrn_vm *vm) 245 + { 246 + char name[ACRN_NAME_LEN]; 247 + 248 + mutex_init(&vm->ioeventfds_lock); 249 + INIT_LIST_HEAD(&vm->ioeventfds); 250 + snprintf(name, sizeof(name), "ioeventfd-%u", vm->vmid); 251 + vm->ioeventfd_client = acrn_ioreq_client_create(vm, 252 + acrn_ioeventfd_handler, 253 + NULL, false, name); 254 + if (!vm->ioeventfd_client) { 255 + dev_err(acrn_dev.this_device, "Failed to create ioeventfd ioreq client!\n"); 256 + return -EINVAL; 257 + } 258 + 259 + dev_dbg(acrn_dev.this_device, "VM %u ioeventfd init.\n", vm->vmid); 260 + return 0; 261 + } 262 + 263 + void acrn_ioeventfd_deinit(struct acrn_vm *vm) 264 + { 265 + struct hsm_ioeventfd *p, *next; 266 + 267 + dev_dbg(acrn_dev.this_device, "VM %u ioeventfd deinit.\n", vm->vmid); 268 + acrn_ioreq_client_destroy(vm->ioeventfd_client); 269 + mutex_lock(&vm->ioeventfds_lock); 270 + list_for_each_entry_safe(p, next, &vm->ioeventfds, list) 271 + acrn_ioeventfd_shutdown(vm, p); 272 + mutex_unlock(&vm->ioeventfds_lock); 273 + }
+2
drivers/virt/acrn/vm.c
··· 50 50 list_add(&vm->list, &acrn_vm_list); 51 51 write_unlock_bh(&acrn_vm_list_lock); 52 52 53 + acrn_ioeventfd_init(vm); 53 54 dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid); 54 55 return vm; 55 56 } ··· 68 67 list_del_init(&vm->list); 69 68 write_unlock_bh(&acrn_vm_list_lock); 70 69 70 + acrn_ioeventfd_deinit(vm); 71 71 acrn_ioreq_deinit(vm); 72 72 if (vm->monitor_page) { 73 73 put_page(vm->monitor_page);
+29
include/uapi/linux/acrn.h
··· 479 479 ACRN_PMCMD_GET_CX_DATA, 480 480 }; 481 481 482 + #define ACRN_IOEVENTFD_FLAG_PIO 0x01 483 + #define ACRN_IOEVENTFD_FLAG_DATAMATCH 0x02 484 + #define ACRN_IOEVENTFD_FLAG_DEASSIGN 0x04 485 + /** 486 + * struct acrn_ioeventfd - Data to operate a &struct hsm_ioeventfd 487 + * @fd: The fd of eventfd associated with a hsm_ioeventfd 488 + * @flags: Logical-OR of ACRN_IOEVENTFD_FLAG_* 489 + * @addr: The start address of IO range of ioeventfd 490 + * @len: The length of IO range of ioeventfd 491 + * @reserved: Reserved and should be 0 492 + * @data: Data for data matching 493 + * 494 + * Without flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl ACRN_IOCTL_IOEVENTFD 495 + * creates a &struct hsm_ioeventfd with properties originated from &struct 496 + * acrn_ioeventfd. With flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl 497 + * ACRN_IOCTL_IOEVENTFD destroys the &struct hsm_ioeventfd matching the fd. 498 + */ 499 + struct acrn_ioeventfd { 500 + __u32 fd; 501 + __u32 flags; 502 + __u64 addr; 503 + __u32 len; 504 + __u32 reserved; 505 + __u64 data; 506 + }; 507 + 482 508 /* The ioctl type, documented in ioctl-number.rst */ 483 509 #define ACRN_IOCTL_TYPE 0xA2 484 510 ··· 558 532 559 533 #define ACRN_IOCTL_PM_GET_CPU_STATE \ 560 534 _IOWR(ACRN_IOCTL_TYPE, 0x60, __u64) 535 + 536 + #define ACRN_IOCTL_IOEVENTFD \ 537 + _IOW(ACRN_IOCTL_TYPE, 0x70, struct acrn_ioeventfd) 561 538 562 539 #endif /* _UAPI_ACRN_H */