Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio: selftests: Add iommufd mode

Add a new IOMMU mode for using iommufd directly. In this mode userspace
opens /dev/iommu and binds it to a device FD acquired through
/dev/vfio/devices/vfioX.

Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20250822212518.4156428-29-dmatlack@google.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

authored by

David Matlack and committed by
Alex Williamson
61cbfe50 d1a17495

+149 -38
+5 -1
tools/testing/selftests/vfio/lib/include/vfio_util.h
··· 62 62 FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \ 63 63 FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \ 64 64 FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \ 65 - FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__) 65 + FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \ 66 + FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__) 66 67 67 68 struct vfio_pci_bar { 68 69 struct vfio_region_info info; ··· 166 165 const struct vfio_iommu_mode *iommu_mode; 167 166 int group_fd; 168 167 int container_fd; 168 + 169 + int iommufd; 170 + u32 ioas_id; 169 171 170 172 struct vfio_device_info info; 171 173 struct vfio_region_info config_space;
+144 -37
tools/testing/selftests/vfio/lib/vfio_pci_device.c
··· 10 10 #include <sys/ioctl.h> 11 11 #include <sys/mman.h> 12 12 13 + #include <uapi/linux/types.h> 13 14 #include <linux/limits.h> 14 15 #include <linux/mman.h> 15 16 #include <linux/types.h> 16 17 #include <linux/vfio.h> 18 + #include <linux/iommufd.h> 17 19 18 20 #include "../../../kselftest.h" 19 21 #include <vfio_util.h> ··· 141 139 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); 142 140 } 143 141 144 - void vfio_pci_dma_map(struct vfio_pci_device *device, 145 - struct vfio_dma_region *region) 142 + static void vfio_iommu_dma_map(struct vfio_pci_device *device, 143 + struct vfio_dma_region *region) 146 144 { 147 - struct vfio_iommu_type1_dma_map map = { 148 - .argsz = sizeof(map), 145 + struct vfio_iommu_type1_dma_map args = { 146 + .argsz = sizeof(args), 149 147 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 150 148 .vaddr = (u64)region->vaddr, 151 149 .iova = region->iova, 152 150 .size = region->size, 153 151 }; 154 152 155 - ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &map); 153 + ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args); 154 + } 155 + 156 + static void iommufd_dma_map(struct vfio_pci_device *device, 157 + struct vfio_dma_region *region) 158 + { 159 + struct iommu_ioas_map args = { 160 + .size = sizeof(args), 161 + .flags = IOMMU_IOAS_MAP_READABLE | 162 + IOMMU_IOAS_MAP_WRITEABLE | 163 + IOMMU_IOAS_MAP_FIXED_IOVA, 164 + .user_va = (u64)region->vaddr, 165 + .iova = region->iova, 166 + .length = region->size, 167 + .ioas_id = device->ioas_id, 168 + }; 169 + 170 + ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args); 171 + } 172 + 173 + void vfio_pci_dma_map(struct vfio_pci_device *device, 174 + struct vfio_dma_region *region) 175 + { 176 + if (device->iommufd) 177 + iommufd_dma_map(device, region); 178 + else 179 + vfio_iommu_dma_map(device, region); 156 180 157 181 list_add(&region->link, &device->dma_regions); 182 + } 183 + 184 + static void vfio_iommu_dma_unmap(struct vfio_pci_device *device, 185 + struct vfio_dma_region *region) 186 + { 187 + struct vfio_iommu_type1_dma_unmap args = { 188 + .argsz = sizeof(args), 189 + .iova = region->iova, 190 + .size = region->size, 191 + }; 192 + 193 + ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args); 194 + } 195 + 196 + static void iommufd_dma_unmap(struct vfio_pci_device *device, 197 + struct vfio_dma_region *region) 198 + { 199 + struct iommu_ioas_unmap args = { 200 + .size = sizeof(args), 201 + .iova = region->iova, 202 + .length = region->size, 203 + .ioas_id = device->ioas_id, 204 + }; 205 + 206 + ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args); 158 207 } 159 208 160 209 void vfio_pci_dma_unmap(struct vfio_pci_device *device, 161 210 struct vfio_dma_region *region) 162 211 { 163 - struct vfio_iommu_type1_dma_unmap unmap = { 164 - .argsz = sizeof(unmap), 165 - .iova = region->iova, 166 - .size = region->size, 167 - }; 168 - 169 - ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &unmap); 212 + if (device->iommufd) 213 + iommufd_dma_unmap(device, region); 214 + else 215 + vfio_iommu_dma_unmap(device, region); 170 216 171 217 list_del(&region->link); 172 218 } ··· 308 258 return group; 309 259 } 310 260 311 - static void vfio_pci_container_setup(struct vfio_pci_device *device) 312 - { 313 - const char *path = device->iommu_mode->container_path; 314 - int version; 315 - 316 - device->container_fd = open(path, O_RDWR); 317 - VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path); 318 - 319 - version = ioctl(device->container_fd, VFIO_GET_API_VERSION); 320 - VFIO_ASSERT_EQ(version, VFIO_API_VERSION); 321 - } 322 - 323 261 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) 324 262 { 325 263 struct vfio_group_status group_status = { ··· 328 290 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd); 329 291 } 330 292 331 - static void vfio_pci_iommu_setup(struct vfio_pci_device *device) 293 + static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) 332 294 { 333 295 unsigned long iommu_type = device->iommu_mode->iommu_type; 296 + const char *path = device->iommu_mode->container_path; 297 + int version; 334 298 int ret; 335 299 336 - INIT_LIST_HEAD(&device->dma_regions); 300 + device->container_fd = open(path, O_RDWR); 301 + VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path); 302 + 303 + version = ioctl(device->container_fd, VFIO_GET_API_VERSION); 304 + VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version); 305 + 306 + vfio_pci_group_setup(device, bdf); 337 307 338 308 ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type); 339 309 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); 340 310 341 311 ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); 342 - } 343 - 344 - static void vfio_pci_device_setup(struct vfio_pci_device *device, const char *bdf) 345 - { 346 - int i; 347 312 348 313 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); 349 314 VFIO_ASSERT_GE(device->fd, 0); 315 + } 316 + 317 + static void vfio_pci_device_setup(struct vfio_pci_device *device) 318 + { 319 + int i; 350 320 351 321 device->info.argsz = sizeof(device->info); 352 322 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); ··· 432 386 .container_path = "/dev/iommu", 433 387 .iommu_type = VFIO_TYPE1v2_IOMMU, 434 388 }, 389 + { 390 + .name = "iommufd", 391 + }, 435 392 }; 436 393 437 394 const char *default_iommu_mode = "vfio_type1_iommu"; ··· 456 407 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode); 457 408 } 458 409 410 + static void vfio_device_bind_iommufd(int device_fd, int iommufd) 411 + { 412 + struct vfio_device_bind_iommufd args = { 413 + .argsz = sizeof(args), 414 + .iommufd = iommufd, 415 + }; 416 + 417 + ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); 418 + } 419 + 420 + static u32 iommufd_ioas_alloc(int iommufd) 421 + { 422 + struct iommu_ioas_alloc args = { 423 + .size = sizeof(args), 424 + }; 425 + 426 + ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args); 427 + return args.out_ioas_id; 428 + } 429 + 430 + static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) 431 + { 432 + struct vfio_device_attach_iommufd_pt args = { 433 + .argsz = sizeof(args), 434 + .pt_id = pt_id, 435 + }; 436 + 437 + ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); 438 + } 439 + 440 + static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) 441 + { 442 + const char *cdev_path = vfio_pci_get_cdev_path(bdf); 443 + 444 + device->fd = open(cdev_path, O_RDWR); 445 + VFIO_ASSERT_GE(device->fd, 0); 446 + free((void *)cdev_path); 447 + 448 + /* 449 + * Require device->iommufd to be >0 so that a simple non-0 check can be 450 + * used to check if iommufd is enabled. In practice open() will never 451 + * return 0 unless stdin is closed. 452 + */ 453 + device->iommufd = open("/dev/iommu", O_RDWR); 454 + VFIO_ASSERT_GT(device->iommufd, 0); 455 + 456 + vfio_device_bind_iommufd(device->fd, device->iommufd); 457 + device->ioas_id = iommufd_ioas_alloc(device->iommufd); 458 + vfio_device_attach_iommufd_pt(device->fd, device->ioas_id); 459 + } 460 + 459 461 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode) 460 462 { 461 463 struct vfio_pci_device *device; ··· 514 414 device = calloc(1, sizeof(*device)); 515 415 VFIO_ASSERT_NOT_NULL(device); 516 416 417 + INIT_LIST_HEAD(&device->dma_regions); 418 + 517 419 device->iommu_mode = lookup_iommu_mode(iommu_mode); 518 420 519 - vfio_pci_container_setup(device); 520 - vfio_pci_group_setup(device, bdf); 521 - vfio_pci_iommu_setup(device); 522 - vfio_pci_device_setup(device, bdf); 421 + if (device->iommu_mode->container_path) 422 + vfio_pci_container_setup(device, bdf); 423 + else 424 + vfio_pci_iommufd_setup(device, bdf); 523 425 426 + vfio_pci_device_setup(device); 524 427 vfio_pci_driver_probe(device); 525 428 526 429 return device; ··· 547 444 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 548 445 } 549 446 550 - VFIO_ASSERT_EQ(close(device->group_fd), 0); 551 - VFIO_ASSERT_EQ(close(device->container_fd), 0); 447 + if (device->iommufd) { 448 + VFIO_ASSERT_EQ(close(device->iommufd), 0); 449 + } else { 450 + VFIO_ASSERT_EQ(close(device->group_fd), 0); 451 + VFIO_ASSERT_EQ(close(device->container_fd), 0); 452 + } 552 453 553 454 free(device); 554 455 }