Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

VMCI: Add support for virtual IOMMU

This patch adds support for virtual IOMMU to the vmci module. We switch
to DMA consistent mappings for guest queuepair and doorbell pages that
are passed to the device. We still allocate each page individually,
since there's no guarantee that we'll get a contiguous block of physical
for an entire queuepair (especially since we allow up to 128 MiB!).

Also made the split between guest and host in the kernelIf struct much
clearer. Now it's obvious which fields are which.

Acked-by: George Zhang <georgezhang@vmware.com>
Acked-by: Aditya Sarwade <asarwade@vmware.com>
Signed-off-by: Andy King <acking@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Andy King and committed by
Greg Kroah-Hartman
6d6dfb4f 45412bef

+127 -82
+1 -1
drivers/misc/vmw_vmci/vmci_driver.c
··· 113 113 114 114 MODULE_AUTHOR("VMware, Inc."); 115 115 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); 116 - MODULE_VERSION("1.0.0.0-k"); 116 + MODULE_VERSION("1.1.0.0-k"); 117 117 MODULE_LICENSE("GPL v2");
+7
drivers/misc/vmw_vmci/vmci_driver.h
··· 35 35 enum vmci_obj_type type; 36 36 }; 37 37 38 + /* 39 + * Needed by other components of this module. It's okay to have one global 40 + * instance of this because there can only ever be one VMCI device. Our 41 + * virtual hardware enforces this. 42 + */ 43 + extern struct pci_dev *vmci_pdev; 44 + 38 45 u32 vmci_get_context_id(void); 39 46 int vmci_send_datagram(struct vmci_datagram *dg); 40 47
+16 -6
drivers/misc/vmw_vmci/vmci_guest.c
··· 65 65 66 66 void *data_buffer; 67 67 void *notification_bitmap; 68 + dma_addr_t notification_base; 68 69 }; 69 70 70 71 /* vmci_dev singleton device and supporting data*/ 72 + struct pci_dev *vmci_pdev; 71 73 static struct vmci_guest_device *vmci_dev_g; 72 74 static DEFINE_SPINLOCK(vmci_dev_spinlock); 73 75 ··· 530 528 * well. 531 529 */ 532 530 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 533 - vmci_dev->notification_bitmap = vmalloc(PAGE_SIZE); 531 + vmci_dev->notification_bitmap = dma_alloc_coherent( 532 + &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, 533 + GFP_KERNEL); 534 534 if (!vmci_dev->notification_bitmap) { 535 535 dev_warn(&pdev->dev, 536 536 "Unable to allocate notification bitmap\n"); ··· 550 546 /* Set up global device so that we can start sending datagrams */ 551 547 spin_lock_irq(&vmci_dev_spinlock); 552 548 vmci_dev_g = vmci_dev; 549 + vmci_pdev = pdev; 553 550 spin_unlock_irq(&vmci_dev_spinlock); 554 551 555 552 /* ··· 558 553 * used. 559 554 */ 560 555 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 561 - struct page *page = 562 - vmalloc_to_page(vmci_dev->notification_bitmap); 563 - unsigned long bitmap_ppn = page_to_pfn(page); 556 + unsigned long bitmap_ppn = 557 + vmci_dev->notification_base >> PAGE_SHIFT; 564 558 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { 565 559 dev_warn(&pdev->dev, 566 560 "VMCI device unable to register notification bitmap with PPN 0x%x\n", ··· 669 665 if (vmci_dev->notification_bitmap) { 670 666 iowrite32(VMCI_CONTROL_RESET, 671 667 vmci_dev->iobase + VMCI_CONTROL_ADDR); 672 - vfree(vmci_dev->notification_bitmap); 668 + dma_free_coherent(&pdev->dev, PAGE_SIZE, 669 + vmci_dev->notification_bitmap, 670 + vmci_dev->notification_base); 673 671 } 674 672 675 673 err_remove_vmci_dev_g: 676 674 spin_lock_irq(&vmci_dev_spinlock); 675 + vmci_pdev = NULL; 677 676 vmci_dev_g = NULL; 678 677 spin_unlock_irq(&vmci_dev_spinlock); 679 678 ··· 706 699 707 700 spin_lock_irq(&vmci_dev_spinlock); 708 701 vmci_dev_g = NULL; 702 + vmci_pdev = NULL; 709 703 spin_unlock_irq(&vmci_dev_spinlock); 710 704 711 705 dev_dbg(&pdev->dev, "Resetting vmci device\n"); ··· 735 727 * device, so we can safely free it here. 736 728 */ 737 729 738 - vfree(vmci_dev->notification_bitmap); 730 + dma_free_coherent(&pdev->dev, PAGE_SIZE, 731 + vmci_dev->notification_bitmap, 732 + vmci_dev->notification_base); 739 733 } 740 734 741 735 vfree(vmci_dev->data_buffer);
+103 -75
drivers/misc/vmw_vmci/vmci_queue_pair.c
··· 21 21 #include <linux/module.h> 22 22 #include <linux/mutex.h> 23 23 #include <linux/pagemap.h> 24 + #include <linux/pci.h> 24 25 #include <linux/sched.h> 25 26 #include <linux/slab.h> 26 27 #include <linux/uio.h> ··· 147 146 148 147 /* The Kernel specific component of the struct vmci_queue structure. */ 149 148 struct vmci_queue_kern_if { 150 - struct page **page; 151 - struct page **header_page; 152 149 struct mutex __mutex; /* Protects the queue. */ 153 150 struct mutex *mutex; /* Shared by producer and consumer queues. */ 154 - bool host; 155 - size_t num_pages; 151 + size_t num_pages; /* Number of pages incl. header. */ 152 + bool host; /* Host or guest? */ 153 + union { 154 + struct { 155 + dma_addr_t *pas; 156 + void **vas; 157 + } g; /* Used by the guest. */ 158 + struct { 159 + struct page **page; 160 + struct page **header_page; 161 + } h; /* Used by the host. */ 162 + } u; 156 163 }; 157 164 158 165 /* ··· 272 263 struct vmci_queue *queue = q; 273 264 274 265 if (queue) { 275 - u64 i = DIV_ROUND_UP(size, PAGE_SIZE); 266 + u64 i; 276 267 277 - while (i) 278 - __free_page(queue->kernel_if->page[--i]); 268 + /* Given size does not include header, so add in a page here. */ 269 + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) { 270 + dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE, 271 + queue->kernel_if->u.g.vas[i], 272 + queue->kernel_if->u.g.pas[i]); 273 + } 279 274 280 - vfree(queue->q_header); 275 + vfree(queue); 281 276 } 282 277 } 283 278 284 279 /* 285 - * Allocates kernel VA space of specified size, plus space for the 286 - * queue structure/kernel interface and the queue header. Allocates 287 - * physical pages for the queue data pages. 288 - * 289 - * PAGE m: struct vmci_queue_header (struct vmci_queue->q_header) 290 - * PAGE m+1: struct vmci_queue 291 - * PAGE m+1+q: struct vmci_queue_kern_if (struct vmci_queue->kernel_if) 292 - * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[]) 280 + * Allocates kernel queue pages of specified size with IOMMU mappings, 281 + * plus space for the queue structure/kernel interface and the queue 282 + * header. 293 283 */ 294 284 static void *qp_alloc_queue(u64 size, u32 flags) 295 285 { 296 286 u64 i; 297 287 struct vmci_queue *queue; 298 - struct vmci_queue_header *q_header; 299 - const u64 num_data_pages = DIV_ROUND_UP(size, PAGE_SIZE); 300 - const uint queue_size = 301 - PAGE_SIZE + 302 - sizeof(*queue) + sizeof(*(queue->kernel_if)) + 303 - num_data_pages * sizeof(*(queue->kernel_if->page)); 288 + const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; 289 + const size_t pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas); 290 + const size_t vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas); 291 + const size_t queue_size = 292 + sizeof(*queue) + sizeof(*queue->kernel_if) + 293 + pas_size + vas_size; 304 294 305 - q_header = vmalloc(queue_size); 306 - if (!q_header) 295 + queue = vmalloc(queue_size); 296 + if (!queue) 307 297 return NULL; 308 298 309 - queue = (void *)q_header + PAGE_SIZE; 310 - queue->q_header = q_header; 299 + queue->q_header = NULL; 311 300 queue->saved_header = NULL; 312 301 queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); 313 - queue->kernel_if->header_page = NULL; /* Unused in guest. */ 314 - queue->kernel_if->page = (struct page **)(queue->kernel_if + 1); 302 + queue->kernel_if->mutex = NULL; 303 + queue->kernel_if->num_pages = num_pages; 304 + queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1); 305 + queue->kernel_if->u.g.vas = 306 + (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size); 315 307 queue->kernel_if->host = false; 316 308 317 - for (i = 0; i < num_data_pages; i++) { 318 - queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0); 319 - if (!queue->kernel_if->page[i]) 320 - goto fail; 309 + for (i = 0; i < num_pages; i++) { 310 + queue->kernel_if->u.g.vas[i] = 311 + dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE, 312 + &queue->kernel_if->u.g.pas[i], 313 + GFP_KERNEL); 314 + if (!queue->kernel_if->u.g.vas[i]) { 315 + /* Size excl. the header. */ 316 + qp_free_queue(queue, i * PAGE_SIZE); 317 + return NULL; 318 + } 321 319 } 322 320 323 - return (void *)queue; 321 + /* Queue header is the first page. */ 322 + queue->q_header = queue->kernel_if->u.g.vas[0]; 324 323 325 - fail: 326 - qp_free_queue(queue, i * PAGE_SIZE); 327 - return NULL; 324 + return queue; 328 325 } 329 326 330 327 /* ··· 349 334 size_t bytes_copied = 0; 350 335 351 336 while (bytes_copied < size) { 352 - u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; 353 - size_t page_offset = 337 + const u64 page_index = 338 + (queue_offset + bytes_copied) / PAGE_SIZE; 339 + const size_t page_offset = 354 340 (queue_offset + bytes_copied) & (PAGE_SIZE - 1); 355 341 void *va; 356 342 size_t to_copy; 357 343 358 - va = kmap(kernel_if->page[page_index]); 344 + if (kernel_if->host) 345 + va = kmap(kernel_if->u.h.page[page_index]); 346 + else 347 + va = kernel_if->u.g.vas[page_index + 1]; 348 + /* Skip header. */ 359 349 360 350 if (size - bytes_copied > PAGE_SIZE - page_offset) 361 351 /* Enough payload to fill up from this page. */ ··· 376 356 err = memcpy_fromiovec((u8 *)va + page_offset, 377 357 iov, to_copy); 378 358 if (err != 0) { 379 - kunmap(kernel_if->page[page_index]); 359 + if (kernel_if->host) 360 + kunmap(kernel_if->u.h.page[page_index]); 380 361 return VMCI_ERROR_INVALID_ARGS; 381 362 } 382 363 } else { ··· 386 365 } 387 366 388 367 bytes_copied += to_copy; 389 - kunmap(kernel_if->page[page_index]); 368 + if (kernel_if->host) 369 + kunmap(kernel_if->u.h.page[page_index]); 390 370 } 391 371 392 372 return VMCI_SUCCESS; ··· 409 387 size_t bytes_copied = 0; 410 388 411 389 while (bytes_copied < size) { 412 - u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; 413 - size_t page_offset = 390 + const u64 page_index = 391 + (queue_offset + bytes_copied) / PAGE_SIZE; 392 + const size_t page_offset = 414 393 (queue_offset + bytes_copied) & (PAGE_SIZE - 1); 415 394 void *va; 416 395 size_t to_copy; 417 396 418 - va = kmap(kernel_if->page[page_index]); 397 + if (kernel_if->host) 398 + va = kmap(kernel_if->u.h.page[page_index]); 399 + else 400 + va = kernel_if->u.g.vas[page_index + 1]; 401 + /* Skip header. */ 419 402 420 403 if (size - bytes_copied > PAGE_SIZE - page_offset) 421 404 /* Enough payload to fill up this page. */ ··· 436 409 err = memcpy_toiovec(iov, (u8 *)va + page_offset, 437 410 to_copy); 438 411 if (err != 0) { 439 - kunmap(kernel_if->page[page_index]); 412 + if (kernel_if->host) 413 + kunmap(kernel_if->u.h.page[page_index]); 440 414 return VMCI_ERROR_INVALID_ARGS; 441 415 } 442 416 } else { ··· 446 418 } 447 419 448 420 bytes_copied += to_copy; 449 - kunmap(kernel_if->page[page_index]); 421 + if (kernel_if->host) 422 + kunmap(kernel_if->u.h.page[page_index]); 450 423 } 451 424 452 425 return VMCI_SUCCESS; ··· 489 460 return VMCI_ERROR_NO_MEM; 490 461 } 491 462 492 - produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header)); 493 - for (i = 1; i < num_produce_pages; i++) { 463 + for (i = 0; i < num_produce_pages; i++) { 494 464 unsigned long pfn; 495 465 496 466 produce_ppns[i] = 497 - page_to_pfn(produce_q->kernel_if->page[i - 1]); 467 + produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; 498 468 pfn = produce_ppns[i]; 499 469 500 470 /* Fail allocation if PFN isn't supported by hypervisor. */ ··· 502 474 goto ppn_error; 503 475 } 504 476 505 - consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header)); 506 - for (i = 1; i < num_consume_pages; i++) { 477 + for (i = 0; i < num_consume_pages; i++) { 507 478 unsigned long pfn; 508 479 509 480 consume_ppns[i] = 510 - page_to_pfn(consume_q->kernel_if->page[i - 1]); 481 + consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; 511 482 pfn = consume_ppns[i]; 512 483 513 484 /* Fail allocation if PFN isn't supported by hypervisor. */ ··· 617 590 const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; 618 591 const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); 619 592 const size_t queue_page_size = 620 - num_pages * sizeof(*queue->kernel_if->page); 593 + num_pages * sizeof(*queue->kernel_if->u.h.page); 621 594 622 595 queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); 623 596 if (queue) { 624 597 queue->q_header = NULL; 625 598 queue->saved_header = NULL; 626 - queue->kernel_if = 627 - (struct vmci_queue_kern_if *)((u8 *)queue + 628 - sizeof(*queue)); 599 + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); 629 600 queue->kernel_if->host = true; 630 601 queue->kernel_if->mutex = NULL; 631 602 queue->kernel_if->num_pages = num_pages; 632 - queue->kernel_if->header_page = 603 + queue->kernel_if->u.h.header_page = 633 604 (struct page **)((u8 *)queue + queue_size); 634 - queue->kernel_if->page = &queue->kernel_if->header_page[1]; 605 + queue->kernel_if->u.h.page = 606 + &queue->kernel_if->u.h.header_page[1]; 635 607 } 636 608 637 609 return queue; ··· 737 711 current->mm, 738 712 (uintptr_t) produce_uva, 739 713 produce_q->kernel_if->num_pages, 740 - 1, 0, produce_q->kernel_if->header_page, NULL); 714 + 1, 0, 715 + produce_q->kernel_if->u.h.header_page, NULL); 741 716 if (retval < produce_q->kernel_if->num_pages) { 742 717 pr_warn("get_user_pages(produce) failed (retval=%d)", retval); 743 - qp_release_pages(produce_q->kernel_if->header_page, retval, 744 - false); 718 + qp_release_pages(produce_q->kernel_if->u.h.header_page, 719 + retval, false); 745 720 err = VMCI_ERROR_NO_MEM; 746 721 goto out; 747 722 } ··· 751 724 current->mm, 752 725 (uintptr_t) consume_uva, 753 726 consume_q->kernel_if->num_pages, 754 - 1, 0, consume_q->kernel_if->header_page, NULL); 727 + 1, 0, 728 + consume_q->kernel_if->u.h.header_page, NULL); 755 729 if (retval < consume_q->kernel_if->num_pages) { 756 730 pr_warn("get_user_pages(consume) failed (retval=%d)", retval); 757 - qp_release_pages(consume_q->kernel_if->header_page, retval, 758 - false); 759 - qp_release_pages(produce_q->kernel_if->header_page, 731 + qp_release_pages(consume_q->kernel_if->u.h.header_page, 732 + retval, false); 733 + qp_release_pages(produce_q->kernel_if->u.h.header_page, 760 734 produce_q->kernel_if->num_pages, false); 761 735 err = VMCI_ERROR_NO_MEM; 762 736 } ··· 800 772 static void qp_host_unregister_user_memory(struct vmci_queue *produce_q, 801 773 struct vmci_queue *consume_q) 802 774 { 803 - qp_release_pages(produce_q->kernel_if->header_page, 775 + qp_release_pages(produce_q->kernel_if->u.h.header_page, 804 776 produce_q->kernel_if->num_pages, true); 805 - memset(produce_q->kernel_if->header_page, 0, 806 - sizeof(*produce_q->kernel_if->header_page) * 777 + memset(produce_q->kernel_if->u.h.header_page, 0, 778 + sizeof(*produce_q->kernel_if->u.h.header_page) * 807 779 produce_q->kernel_if->num_pages); 808 - qp_release_pages(consume_q->kernel_if->header_page, 780 + qp_release_pages(consume_q->kernel_if->u.h.header_page, 809 781 consume_q->kernel_if->num_pages, true); 810 - memset(consume_q->kernel_if->header_page, 0, 811 - sizeof(*consume_q->kernel_if->header_page) * 782 + memset(consume_q->kernel_if->u.h.header_page, 0, 783 + sizeof(*consume_q->kernel_if->u.h.header_page) * 812 784 consume_q->kernel_if->num_pages); 813 785 } 814 786 ··· 831 803 if (produce_q->q_header != consume_q->q_header) 832 804 return VMCI_ERROR_QUEUEPAIR_MISMATCH; 833 805 834 - if (produce_q->kernel_if->header_page == NULL || 835 - *produce_q->kernel_if->header_page == NULL) 806 + if (produce_q->kernel_if->u.h.header_page == NULL || 807 + *produce_q->kernel_if->u.h.header_page == NULL) 836 808 return VMCI_ERROR_UNAVAILABLE; 837 809 838 - headers[0] = *produce_q->kernel_if->header_page; 839 - headers[1] = *consume_q->kernel_if->header_page; 810 + headers[0] = *produce_q->kernel_if->u.h.header_page; 811 + headers[1] = *consume_q->kernel_if->u.h.header_page; 840 812 841 813 produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL); 842 814 if (produce_q->q_header != NULL) {