Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull hmm updates from Jason Gunthorpe:
"This series adds a selftest for hmm_range_fault() and several of the
DEVICE_PRIVATE migration related actions, and another simplification
for hmm_range_fault()'s API.

- Simplify hmm_range_fault() with a simpler return code, no
HMM_PFN_SPECIAL, and no customizable output PFN format

- Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related
functionality"

* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
MAINTAINERS: add HMM selftests
mm/hmm/test: add selftests for HMM
mm/hmm/test: add selftest driver for HMM
mm/hmm: remove the customizable pfn format from hmm_range_fault
mm/hmm: remove HMM_PFN_SPECIAL
drm/amdgpu: remove dead code after hmm_range_fault()
mm/hmm: make hmm_range_fault return 0 or -1

+2934 -289
+11 -19
Documentation/vm/hmm.rst
··· 161 161 When the device driver wants to populate a range of virtual addresses, it can 162 162 use:: 163 163 164 - long hmm_range_fault(struct hmm_range *range); 164 + int hmm_range_fault(struct hmm_range *range); 165 165 166 166 It will trigger a page fault on missing or read-only entries if write access is 167 167 requested (see below). Page faults use the generic mm page fault code path just ··· 184 184 range.notifier = &interval_sub; 185 185 range.start = ...; 186 186 range.end = ...; 187 - range.pfns = ...; 188 - range.flags = ...; 189 - range.values = ...; 190 - range.pfn_shift = ...; 187 + range.hmm_pfns = ...; 191 188 192 189 if (!mmget_not_zero(interval_sub->notifier.mm)) 193 190 return -EFAULT; ··· 226 229 fault or snapshot policy for the whole range instead of having to set them 227 230 for each entry in the pfns array. 228 231 229 - For instance, if the device flags for range.flags are:: 232 + For instance if the device driver wants pages for a range with at least read 233 + permission, it sets:: 230 234 231 - range.flags[HMM_PFN_VALID] = (1 << 63); 232 - range.flags[HMM_PFN_WRITE] = (1 << 62); 233 - 234 - and the device driver wants pages for a range with at least read permission, 235 - it sets:: 236 - 237 - range->default_flags = (1 << 63); 235 + range->default_flags = HMM_PFN_REQ_FAULT; 238 236 range->pfn_flags_mask = 0; 239 237 240 238 and calls hmm_range_fault() as described above. This will fill fault all pages ··· 238 246 Now let's say the driver wants to do the same except for one page in the range for 239 247 which it wants to have write permission. Now driver set:: 240 248 241 - range->default_flags = (1 << 63); 242 - range->pfn_flags_mask = (1 << 62); 243 - range->pfns[index_of_write] = (1 << 62); 249 + range->default_flags = HMM_PFN_REQ_FAULT; 250 + range->pfn_flags_mask = HMM_PFN_REQ_WRITE; 251 + range->pfns[index_of_write] = HMM_PFN_REQ_WRITE; 244 252 245 253 With this, HMM will fault in all pages with at least read (i.e., valid) and for the 246 254 address == range->start + (index_of_write << PAGE_SHIFT) it will fault with 247 255 write permission i.e., if the CPU pte does not have write permission set then HMM 248 256 will call handle_mm_fault(). 249 257 250 - Note that HMM will populate the pfns array with write permission for any page 251 - that is mapped with CPU write permission no matter what values are set 252 - in default_flags or pfn_flags_mask. 258 + After hmm_range_fault completes the flag bits are set to the current state of 259 + the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is 260 + writable. 253 261 254 262 255 263 Represent and manage device memory from core kernel point of view
+2
MAINTAINERS
··· 7768 7768 S: Maintained 7769 7769 F: Documentation/vm/hmm.rst 7770 7770 F: include/linux/hmm* 7771 + F: lib/test_hmm* 7771 7772 F: mm/hmm* 7773 + F: tools/testing/selftests/vm/*hmm* 7772 7774 7773 7775 HOST AP DRIVER 7774 7776 M: Jouni Malinen <j@w1.fi>
+18 -38
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 766 766 }; 767 767 768 768 #ifdef CONFIG_DRM_AMDGPU_USERPTR 769 - /* flags used by HMM internal, not related to CPU/GPU PTE flags */ 770 - static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { 771 - (1 << 0), /* HMM_PFN_VALID */ 772 - (1 << 1), /* HMM_PFN_WRITE */ 773 - }; 774 - 775 - static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { 776 - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 777 - 0, /* HMM_PFN_NONE */ 778 - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 779 - }; 780 - 781 769 /** 782 770 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user 783 771 * memory and start HMM tracking CPU page table update ··· 804 816 goto out; 805 817 } 806 818 range->notifier = &bo->notifier; 807 - range->flags = hmm_range_flags; 808 - range->values = hmm_range_values; 809 - range->pfn_shift = PAGE_SHIFT; 810 819 range->start = bo->notifier.interval_tree.start; 811 820 range->end = bo->notifier.interval_tree.last + 1; 812 - range->default_flags = hmm_range_flags[HMM_PFN_VALID]; 821 + range->default_flags = HMM_PFN_REQ_FAULT; 813 822 if (!amdgpu_ttm_tt_is_readonly(ttm)) 814 - range->default_flags |= range->flags[HMM_PFN_WRITE]; 823 + range->default_flags |= HMM_PFN_REQ_WRITE; 815 824 816 - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), 817 - GFP_KERNEL); 818 - if (unlikely(!range->pfns)) { 825 + range->hmm_pfns = kvmalloc_array(ttm->num_pages, 826 + sizeof(*range->hmm_pfns), GFP_KERNEL); 827 + if (unlikely(!range->hmm_pfns)) { 819 828 r = -ENOMEM; 820 829 goto out_free_ranges; 821 830 } ··· 837 852 down_read(&mm->mmap_sem); 838 853 r = hmm_range_fault(range); 839 854 up_read(&mm->mmap_sem); 840 - if (unlikely(r <= 0)) { 855 + if (unlikely(r)) { 841 856 /* 842 857 * FIXME: This timeout should encompass the retry from 843 858 * mmu_interval_read_retry() as well. 844 859 */ 845 - if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) 860 + if (r == -EBUSY && !time_after(jiffies, timeout)) 846 861 goto retry; 847 862 goto out_free_pfns; 848 863 } 849 864 850 - for (i = 0; i < ttm->num_pages; i++) { 851 - /* FIXME: The pages cannot be touched outside the notifier_lock */ 852 - pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); 853 - if (unlikely(!pages[i])) { 854 - pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", 855 - i, range->pfns[i]); 856 - r = -ENOMEM; 857 - 858 - goto out_free_pfns; 859 - } 860 - } 865 + /* 866 + * Due to default_flags, all pages are HMM_PFN_VALID or 867 + * hmm_range_fault() fails. FIXME: The pages cannot be touched outside 868 + * the notifier_lock, and mmu_interval_read_retry() must be done first. 869 + */ 870 + for (i = 0; i < ttm->num_pages; i++) 871 + pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); 861 872 862 873 gtt->range = range; 863 874 mmput(mm); ··· 863 882 out_unlock: 864 883 up_read(&mm->mmap_sem); 865 884 out_free_pfns: 866 - kvfree(range->pfns); 885 + kvfree(range->hmm_pfns); 867 886 out_free_ranges: 868 887 kfree(range); 869 888 out: ··· 888 907 DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", 889 908 gtt->userptr, ttm->num_pages); 890 909 891 - WARN_ONCE(!gtt->range || !gtt->range->pfns, 910 + WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, 892 911 "No user pages to check\n"); 893 912 894 913 if (gtt->range) { ··· 898 917 */ 899 918 r = mmu_interval_read_retry(gtt->range->notifier, 900 919 gtt->range->notifier_seq); 901 - kvfree(gtt->range->pfns); 920 + kvfree(gtt->range->hmm_pfns); 902 921 kfree(gtt->range); 903 922 gtt->range = NULL; 904 923 } ··· 989 1008 990 1009 for (i = 0; i < ttm->num_pages; i++) { 991 1010 if (ttm->pages[i] != 992 - hmm_device_entry_to_page(gtt->range, 993 - gtt->range->pfns[i])) 1011 + hmm_pfn_to_page(gtt->range->hmm_pfns[i])) 994 1012 break; 995 1013 } 996 1014
+1 -26
drivers/gpu/drm/nouveau/nouveau_dmem.c
··· 85 85 return container_of(page->pgmap, struct nouveau_dmem, pagemap); 86 86 } 87 87 88 - static unsigned long nouveau_dmem_page_addr(struct page *page) 88 + unsigned long nouveau_dmem_page_addr(struct page *page) 89 89 { 90 90 struct nouveau_dmem_chunk *chunk = page->zone_device_data; 91 91 unsigned long idx = page_to_pfn(page) - chunk->pfn_first; ··· 670 670 kfree(args.src); 671 671 out: 672 672 return ret; 673 - } 674 - 675 - void 676 - nouveau_dmem_convert_pfn(struct nouveau_drm *drm, 677 - struct hmm_range *range) 678 - { 679 - unsigned long i, npages; 680 - 681 - npages = (range->end - range->start) >> PAGE_SHIFT; 682 - for (i = 0; i < npages; ++i) { 683 - struct page *page; 684 - uint64_t addr; 685 - 686 - page = hmm_device_entry_to_page(range, range->pfns[i]); 687 - if (page == NULL) 688 - continue; 689 - 690 - if (!is_device_private_page(page)) 691 - continue; 692 - 693 - addr = nouveau_dmem_page_addr(page); 694 - range->pfns[i] &= ((1UL << range->pfn_shift) - 1); 695 - range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; 696 - range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM; 697 - } 698 673 }
+1 -2
drivers/gpu/drm/nouveau/nouveau_dmem.h
··· 37 37 struct vm_area_struct *vma, 38 38 unsigned long start, 39 39 unsigned long end); 40 + unsigned long nouveau_dmem_page_addr(struct page *page); 40 41 41 - void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, 42 - struct hmm_range *range); 43 42 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ 44 43 static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} 45 44 static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
+62 -32
drivers/gpu/drm/nouveau/nouveau_svm.c
··· 369 369 return ret; 370 370 } 371 371 372 - static const u64 373 - nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { 374 - [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, 375 - [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, 376 - }; 377 - 378 - static const u64 379 - nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = { 380 - [HMM_PFN_ERROR ] = ~NVIF_VMM_PFNMAP_V0_V, 381 - [HMM_PFN_NONE ] = NVIF_VMM_PFNMAP_V0_NONE, 382 - [HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V, 383 - }; 384 - 385 372 /* Issue fault replay for GPU to retry accesses that faulted previously. */ 386 373 static void 387 374 nouveau_svm_fault_replay(struct nouveau_svm *svm) ··· 506 519 .invalidate = nouveau_svm_range_invalidate, 507 520 }; 508 521 522 + static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, 523 + struct hmm_range *range, u64 *ioctl_addr) 524 + { 525 + unsigned long i, npages; 526 + 527 + /* 528 + * The ioctl_addr prepared here is passed through nvif_object_ioctl() 529 + * to an eventual DMA map in something like gp100_vmm_pgt_pfn() 530 + * 531 + * This is all just encoding the internal hmm representation into a 532 + * different nouveau internal representation. 533 + */ 534 + npages = (range->end - range->start) >> PAGE_SHIFT; 535 + for (i = 0; i < npages; ++i) { 536 + struct page *page; 537 + 538 + if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) { 539 + ioctl_addr[i] = 0; 540 + continue; 541 + } 542 + 543 + page = hmm_pfn_to_page(range->hmm_pfns[i]); 544 + if (is_device_private_page(page)) 545 + ioctl_addr[i] = nouveau_dmem_page_addr(page) | 546 + NVIF_VMM_PFNMAP_V0_V | 547 + NVIF_VMM_PFNMAP_V0_VRAM; 548 + else 549 + ioctl_addr[i] = page_to_phys(page) | 550 + NVIF_VMM_PFNMAP_V0_V | 551 + NVIF_VMM_PFNMAP_V0_HOST; 552 + if (range->hmm_pfns[i] & HMM_PFN_WRITE) 553 + ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W; 554 + } 555 + } 556 + 509 557 static int nouveau_range_fault(struct nouveau_svmm *svmm, 510 558 struct nouveau_drm *drm, void *data, u32 size, 511 - u64 *pfns, struct svm_notifier *notifier) 559 + unsigned long hmm_pfns[], u64 *ioctl_addr, 560 + struct svm_notifier *notifier) 512 561 { 513 562 unsigned long timeout = 514 563 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); ··· 553 530 .notifier = &notifier->notifier, 554 531 .start = notifier->notifier.interval_tree.start, 555 532 .end = notifier->notifier.interval_tree.last + 1, 556 - .pfns = pfns, 557 - .flags = nouveau_svm_pfn_flags, 558 - .values = nouveau_svm_pfn_values, 559 - .pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT, 533 + .pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE, 534 + .hmm_pfns = hmm_pfns, 560 535 }; 561 536 struct mm_struct *mm = notifier->notifier.mm; 562 - long ret; 537 + int ret; 563 538 564 539 while (true) { 565 540 if (time_after(jiffies, timeout)) 566 541 return -EBUSY; 567 542 568 543 range.notifier_seq = mmu_interval_read_begin(range.notifier); 569 - range.default_flags = 0; 570 - range.pfn_flags_mask = -1UL; 571 544 down_read(&mm->mmap_sem); 572 545 ret = hmm_range_fault(&range); 573 546 up_read(&mm->mmap_sem); 574 - if (ret <= 0) { 575 - if (ret == 0 || ret == -EBUSY) 547 + if (ret) { 548 + /* 549 + * FIXME: the input PFN_REQ flags are destroyed on 550 + * -EBUSY, we need to regenerate them, also for the 551 + * other continue below 552 + */ 553 + if (ret == -EBUSY) 576 554 continue; 577 555 return ret; 578 556 } ··· 587 563 break; 588 564 } 589 565 590 - nouveau_dmem_convert_pfn(drm, &range); 566 + nouveau_hmm_convert_pfn(drm, &range, ioctl_addr); 591 567 592 568 svmm->vmm->vmm.object.client->super = true; 593 569 ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); ··· 614 590 } i; 615 591 u64 phys[16]; 616 592 } args; 593 + unsigned long hmm_pfns[ARRAY_SIZE(args.phys)]; 617 594 struct vm_area_struct *vma; 618 595 u64 inst, start, limit; 619 596 int fi, fn, pi, fill; ··· 730 705 * access flags. 731 706 *XXX: atomic? 732 707 */ 733 - if (buffer->fault[fn]->access != 0 /* READ. */ && 734 - buffer->fault[fn]->access != 3 /* PREFETCH. */) { 735 - args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V | 736 - NVIF_VMM_PFNMAP_V0_W; 737 - } else { 738 - args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V; 708 + switch (buffer->fault[fn]->access) { 709 + case 0: /* READ. */ 710 + hmm_pfns[pi++] = HMM_PFN_REQ_FAULT; 711 + break; 712 + case 3: /* PREFETCH. */ 713 + hmm_pfns[pi++] = 0; 714 + break; 715 + default: 716 + hmm_pfns[pi++] = HMM_PFN_REQ_FAULT | 717 + HMM_PFN_REQ_WRITE; 718 + break; 739 719 } 740 720 args.i.p.size = pi << PAGE_SHIFT; 741 721 ··· 768 738 fill = (buffer->fault[fn ]->addr - 769 739 buffer->fault[fn - 1]->addr) >> PAGE_SHIFT; 770 740 while (--fill) 771 - args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE; 741 + hmm_pfns[pi++] = 0; 772 742 } 773 743 774 744 SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)", ··· 784 754 ret = nouveau_range_fault( 785 755 svmm, svm->drm, &args, 786 756 sizeof(args.i) + pi * sizeof(args.phys[0]), 787 - args.phys, &notifier); 757 + hmm_pfns, args.phys, &notifier); 788 758 mmu_interval_notifier_remove(&notifier.notifier); 789 759 } 790 760 mmput(mm);
+39 -72
include/linux/hmm.h
··· 19 19 #include <linux/mmu_notifier.h> 20 20 21 21 /* 22 - * hmm_pfn_flag_e - HMM flag enums 22 + * On output: 23 + * 0 - The page is faultable and a future call with 24 + * HMM_PFN_REQ_FAULT could succeed. 25 + * HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at 26 + * least readable. If dev_private_owner is !NULL then this could 27 + * point at a DEVICE_PRIVATE page. 28 + * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) 29 + * HMM_PFN_ERROR - accessing the pfn is impossible and the device should 30 + * fail. ie poisoned memory, special pages, no vma, etc 23 31 * 24 - * Flags: 25 - * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. 26 - * HMM_PFN_WRITE: CPU page table has write permission set 27 - * 28 - * The driver provides a flags array for mapping page protections to device 29 - * PTE bits. If the driver valid bit for an entry is bit 3, 30 - * i.e., (entry & (1 << 3)), then the driver must provide 31 - * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. 32 - * Same logic apply to all flags. This is the same idea as vm_page_prot in vma 33 - * except that this is per device driver rather than per architecture. 32 + * On input: 33 + * 0 - Return the current state of the page, do not fault it. 34 + * HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault() 35 + * will fail 36 + * HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault() 37 + * will fail. Must be combined with HMM_PFN_REQ_FAULT. 34 38 */ 35 - enum hmm_pfn_flag_e { 36 - HMM_PFN_VALID = 0, 37 - HMM_PFN_WRITE, 38 - HMM_PFN_FLAG_MAX 39 + enum hmm_pfn_flags { 40 + /* Output flags */ 41 + HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), 42 + HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), 43 + HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), 44 + 45 + /* Input flags */ 46 + HMM_PFN_REQ_FAULT = HMM_PFN_VALID, 47 + HMM_PFN_REQ_WRITE = HMM_PFN_WRITE, 48 + 49 + HMM_PFN_FLAGS = HMM_PFN_VALID | HMM_PFN_WRITE | HMM_PFN_ERROR, 39 50 }; 40 51 41 52 /* 42 - * hmm_pfn_value_e - HMM pfn special value 53 + * hmm_pfn_to_page() - return struct page pointed to by a device entry 43 54 * 44 - * Flags: 45 - * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory 46 - * HMM_PFN_NONE: corresponding CPU page table entry is pte_none() 47 - * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the 48 - * result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not 49 - * be mirrored by a device, because the entry will never have HMM_PFN_VALID 50 - * set and the pfn value is undefined. 51 - * 52 - * Driver provides values for none entry, error entry, and special entry. 53 - * Driver can alias (i.e., use same value) error and special, but 54 - * it should not alias none with error or special. 55 - * 56 - * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: 57 - * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, 58 - * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry, 59 - * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one 55 + * This must be called under the caller 'user_lock' after a successful 56 + * mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID 57 + * already. 60 58 */ 61 - enum hmm_pfn_value_e { 62 - HMM_PFN_ERROR, 63 - HMM_PFN_NONE, 64 - HMM_PFN_SPECIAL, 65 - HMM_PFN_VALUE_MAX 66 - }; 59 + static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) 60 + { 61 + return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS); 62 + } 67 63 68 64 /* 69 65 * struct hmm_range - track invalidation lock on virtual address range ··· 68 72 * @notifier_seq: result of mmu_interval_read_begin() 69 73 * @start: range virtual start address (inclusive) 70 74 * @end: range virtual end address (exclusive) 71 - * @pfns: array of pfns (big enough for the range) 72 - * @flags: pfn flags to match device driver page table 73 - * @values: pfn value for some special case (none, special, error, ...) 75 + * @hmm_pfns: array of pfns (big enough for the range) 74 76 * @default_flags: default flags for the range (write, read, ... see hmm doc) 75 77 * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter 76 - * @pfn_shift: pfn shift value (should be <= PAGE_SHIFT) 77 78 * @dev_private_owner: owner of device private pages 78 79 */ 79 80 struct hmm_range { ··· 78 85 unsigned long notifier_seq; 79 86 unsigned long start; 80 87 unsigned long end; 81 - uint64_t *pfns; 82 - const uint64_t *flags; 83 - const uint64_t *values; 84 - uint64_t default_flags; 85 - uint64_t pfn_flags_mask; 86 - uint8_t pfn_shift; 88 + unsigned long *hmm_pfns; 89 + unsigned long default_flags; 90 + unsigned long pfn_flags_mask; 87 91 void *dev_private_owner; 88 92 }; 89 93 90 94 /* 91 - * hmm_device_entry_to_page() - return struct page pointed to by a device entry 92 - * @range: range use to decode device entry value 93 - * @entry: device entry value to get corresponding struct page from 94 - * Return: struct page pointer if entry is a valid, NULL otherwise 95 - * 96 - * If the device entry is valid (ie valid flag set) then return the struct page 97 - * matching the entry value. Otherwise return NULL. 98 - */ 99 - static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range, 100 - uint64_t entry) 101 - { 102 - if (entry == range->values[HMM_PFN_NONE]) 103 - return NULL; 104 - if (entry == range->values[HMM_PFN_ERROR]) 105 - return NULL; 106 - if (entry == range->values[HMM_PFN_SPECIAL]) 107 - return NULL; 108 - if (!(entry & range->flags[HMM_PFN_VALID])) 109 - return NULL; 110 - return pfn_to_page(entry >> range->pfn_shift); 111 - } 112 - 113 - /* 114 95 * Please see Documentation/vm/hmm.rst for how to use the range API. 115 96 */ 116 - long hmm_range_fault(struct hmm_range *range); 97 + int hmm_range_fault(struct hmm_range *range); 117 98 118 99 /* 119 100 * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
+13
lib/Kconfig.debug
··· 2218 2218 2219 2219 If unsure, say N. 2220 2220 2221 + config TEST_HMM 2222 + tristate "Test HMM (Heterogeneous Memory Management)" 2223 + depends on TRANSPARENT_HUGEPAGE 2224 + depends on DEVICE_PRIVATE 2225 + select HMM_MIRROR 2226 + select MMU_NOTIFIER 2227 + help 2228 + This is a pseudo device driver solely for testing HMM. 2229 + Say M here if you want to build the HMM test module. 2230 + Doing so will allow you to run tools/testing/selftest/vm/hmm-tests. 2231 + 2232 + If unsure, say N. 2233 + 2221 2234 endif # RUNTIME_TESTING_MENU 2222 2235 2223 2236 config MEMTEST
+1
lib/Makefile
··· 92 92 obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o 93 93 obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o 94 94 obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o 95 + obj-$(CONFIG_TEST_HMM) += test_hmm.o 95 96 96 97 obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ 97 98
+1164
lib/test_hmm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * This is a module to test the HMM (Heterogeneous Memory Management) 4 + * mirror and zone device private memory migration APIs of the kernel. 5 + * Userspace programs can register with the driver to mirror their own address 6 + * space and can use the device to read/write any valid virtual address. 7 + */ 8 + #include <linux/init.h> 9 + #include <linux/fs.h> 10 + #include <linux/mm.h> 11 + #include <linux/module.h> 12 + #include <linux/kernel.h> 13 + #include <linux/cdev.h> 14 + #include <linux/device.h> 15 + #include <linux/mutex.h> 16 + #include <linux/rwsem.h> 17 + #include <linux/sched.h> 18 + #include <linux/slab.h> 19 + #include <linux/highmem.h> 20 + #include <linux/delay.h> 21 + #include <linux/pagemap.h> 22 + #include <linux/hmm.h> 23 + #include <linux/vmalloc.h> 24 + #include <linux/swap.h> 25 + #include <linux/swapops.h> 26 + #include <linux/sched/mm.h> 27 + #include <linux/platform_device.h> 28 + 29 + #include "test_hmm_uapi.h" 30 + 31 + #define DMIRROR_NDEVICES 2 32 + #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 33 + #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) 34 + #define DEVMEM_CHUNKS_RESERVE 16 35 + 36 + static const struct dev_pagemap_ops dmirror_devmem_ops; 37 + static const struct mmu_interval_notifier_ops dmirror_min_ops; 38 + static dev_t dmirror_dev; 39 + static struct page *dmirror_zero_page; 40 + 41 + struct dmirror_device; 42 + 43 + struct dmirror_bounce { 44 + void *ptr; 45 + unsigned long size; 46 + unsigned long addr; 47 + unsigned long cpages; 48 + }; 49 + 50 + #define DPT_XA_TAG_WRITE 3UL 51 + 52 + /* 53 + * Data structure to track address ranges and register for mmu interval 54 + * notifier updates. 55 + */ 56 + struct dmirror_interval { 57 + struct mmu_interval_notifier notifier; 58 + struct dmirror *dmirror; 59 + }; 60 + 61 + /* 62 + * Data attached to the open device file. 63 + * Note that it might be shared after a fork(). 64 + */ 65 + struct dmirror { 66 + struct dmirror_device *mdevice; 67 + struct xarray pt; 68 + struct mmu_interval_notifier notifier; 69 + struct mutex mutex; 70 + }; 71 + 72 + /* 73 + * ZONE_DEVICE pages for migration and simulating device memory. 74 + */ 75 + struct dmirror_chunk { 76 + struct dev_pagemap pagemap; 77 + struct dmirror_device *mdevice; 78 + }; 79 + 80 + /* 81 + * Per device data. 82 + */ 83 + struct dmirror_device { 84 + struct cdev cdevice; 85 + struct hmm_devmem *devmem; 86 + 87 + unsigned int devmem_capacity; 88 + unsigned int devmem_count; 89 + struct dmirror_chunk **devmem_chunks; 90 + struct mutex devmem_lock; /* protects the above */ 91 + 92 + unsigned long calloc; 93 + unsigned long cfree; 94 + struct page *free_pages; 95 + spinlock_t lock; /* protects the above */ 96 + }; 97 + 98 + static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; 99 + 100 + static int dmirror_bounce_init(struct dmirror_bounce *bounce, 101 + unsigned long addr, 102 + unsigned long size) 103 + { 104 + bounce->addr = addr; 105 + bounce->size = size; 106 + bounce->cpages = 0; 107 + bounce->ptr = vmalloc(size); 108 + if (!bounce->ptr) 109 + return -ENOMEM; 110 + return 0; 111 + } 112 + 113 + static void dmirror_bounce_fini(struct dmirror_bounce *bounce) 114 + { 115 + vfree(bounce->ptr); 116 + } 117 + 118 + static int dmirror_fops_open(struct inode *inode, struct file *filp) 119 + { 120 + struct cdev *cdev = inode->i_cdev; 121 + struct dmirror *dmirror; 122 + int ret; 123 + 124 + /* Mirror this process address space */ 125 + dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); 126 + if (dmirror == NULL) 127 + return -ENOMEM; 128 + 129 + dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); 130 + mutex_init(&dmirror->mutex); 131 + xa_init(&dmirror->pt); 132 + 133 + ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, 134 + 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); 135 + if (ret) { 136 + kfree(dmirror); 137 + return ret; 138 + } 139 + 140 + filp->private_data = dmirror; 141 + return 0; 142 + } 143 + 144 + static int dmirror_fops_release(struct inode *inode, struct file *filp) 145 + { 146 + struct dmirror *dmirror = filp->private_data; 147 + 148 + mmu_interval_notifier_remove(&dmirror->notifier); 149 + xa_destroy(&dmirror->pt); 150 + kfree(dmirror); 151 + return 0; 152 + } 153 + 154 + static struct dmirror_device *dmirror_page_to_device(struct page *page) 155 + 156 + { 157 + return container_of(page->pgmap, struct dmirror_chunk, 158 + pagemap)->mdevice; 159 + } 160 + 161 + static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) 162 + { 163 + unsigned long *pfns = range->hmm_pfns; 164 + unsigned long pfn; 165 + 166 + for (pfn = (range->start >> PAGE_SHIFT); 167 + pfn < (range->end >> PAGE_SHIFT); 168 + pfn++, pfns++) { 169 + struct page *page; 170 + void *entry; 171 + 172 + /* 173 + * Since we asked for hmm_range_fault() to populate pages, 174 + * it shouldn't return an error entry on success. 175 + */ 176 + WARN_ON(*pfns & HMM_PFN_ERROR); 177 + WARN_ON(!(*pfns & HMM_PFN_VALID)); 178 + 179 + page = hmm_pfn_to_page(*pfns); 180 + WARN_ON(!page); 181 + 182 + entry = page; 183 + if (*pfns & HMM_PFN_WRITE) 184 + entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 185 + else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) 186 + return -EFAULT; 187 + entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 188 + if (xa_is_err(entry)) 189 + return xa_err(entry); 190 + } 191 + 192 + return 0; 193 + } 194 + 195 + static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, 196 + unsigned long end) 197 + { 198 + unsigned long pfn; 199 + void *entry; 200 + 201 + /* 202 + * The XArray doesn't hold references to pages since it relies on 203 + * the mmu notifier to clear page pointers when they become stale. 204 + * Therefore, it is OK to just clear the entry. 205 + */ 206 + xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, 207 + end >> PAGE_SHIFT) 208 + xa_erase(&dmirror->pt, pfn); 209 + } 210 + 211 + static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, 212 + const struct mmu_notifier_range *range, 213 + unsigned long cur_seq) 214 + { 215 + struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); 216 + 217 + if (mmu_notifier_range_blockable(range)) 218 + mutex_lock(&dmirror->mutex); 219 + else if (!mutex_trylock(&dmirror->mutex)) 220 + return false; 221 + 222 + mmu_interval_set_seq(mni, cur_seq); 223 + dmirror_do_update(dmirror, range->start, range->end); 224 + 225 + mutex_unlock(&dmirror->mutex); 226 + return true; 227 + } 228 + 229 + static const struct mmu_interval_notifier_ops dmirror_min_ops = { 230 + .invalidate = dmirror_interval_invalidate, 231 + }; 232 + 233 + static int dmirror_range_fault(struct dmirror *dmirror, 234 + struct hmm_range *range) 235 + { 236 + struct mm_struct *mm = dmirror->notifier.mm; 237 + unsigned long timeout = 238 + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 239 + int ret; 240 + 241 + while (true) { 242 + if (time_after(jiffies, timeout)) { 243 + ret = -EBUSY; 244 + goto out; 245 + } 246 + 247 + range->notifier_seq = mmu_interval_read_begin(range->notifier); 248 + down_read(&mm->mmap_sem); 249 + ret = hmm_range_fault(range); 250 + up_read(&mm->mmap_sem); 251 + if (ret) { 252 + if (ret == -EBUSY) 253 + continue; 254 + goto out; 255 + } 256 + 257 + mutex_lock(&dmirror->mutex); 258 + if (mmu_interval_read_retry(range->notifier, 259 + range->notifier_seq)) { 260 + mutex_unlock(&dmirror->mutex); 261 + continue; 262 + } 263 + break; 264 + } 265 + 266 + ret = dmirror_do_fault(dmirror, range); 267 + 268 + mutex_unlock(&dmirror->mutex); 269 + out: 270 + return ret; 271 + } 272 + 273 + static int dmirror_fault(struct dmirror *dmirror, unsigned long start, 274 + unsigned long end, bool write) 275 + { 276 + struct mm_struct *mm = dmirror->notifier.mm; 277 + unsigned long addr; 278 + unsigned long pfns[64]; 279 + struct hmm_range range = { 280 + .notifier = &dmirror->notifier, 281 + .hmm_pfns = pfns, 282 + .pfn_flags_mask = 0, 283 + .default_flags = 284 + HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), 285 + .dev_private_owner = dmirror->mdevice, 286 + }; 287 + int ret = 0; 288 + 289 + /* Since the mm is for the mirrored process, get a reference first. */ 290 + if (!mmget_not_zero(mm)) 291 + return 0; 292 + 293 + for (addr = start; addr < end; addr = range.end) { 294 + range.start = addr; 295 + range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 296 + 297 + ret = dmirror_range_fault(dmirror, &range); 298 + if (ret) 299 + break; 300 + } 301 + 302 + mmput(mm); 303 + return ret; 304 + } 305 + 306 + static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, 307 + unsigned long end, struct dmirror_bounce *bounce) 308 + { 309 + unsigned long pfn; 310 + void *ptr; 311 + 312 + ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 313 + 314 + for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 315 + void *entry; 316 + struct page *page; 317 + void *tmp; 318 + 319 + entry = xa_load(&dmirror->pt, pfn); 320 + page = xa_untag_pointer(entry); 321 + if (!page) 322 + return -ENOENT; 323 + 324 + tmp = kmap(page); 325 + memcpy(ptr, tmp, PAGE_SIZE); 326 + kunmap(page); 327 + 328 + ptr += PAGE_SIZE; 329 + bounce->cpages++; 330 + } 331 + 332 + return 0; 333 + } 334 + 335 + static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 336 + { 337 + struct dmirror_bounce bounce; 338 + unsigned long start, end; 339 + unsigned long size = cmd->npages << PAGE_SHIFT; 340 + int ret; 341 + 342 + start = cmd->addr; 343 + end = start + size; 344 + if (end < start) 345 + return -EINVAL; 346 + 347 + ret = dmirror_bounce_init(&bounce, start, size); 348 + if (ret) 349 + return ret; 350 + 351 + while (1) { 352 + mutex_lock(&dmirror->mutex); 353 + ret = dmirror_do_read(dmirror, start, end, &bounce); 354 + mutex_unlock(&dmirror->mutex); 355 + if (ret != -ENOENT) 356 + break; 357 + 358 + start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 359 + ret = dmirror_fault(dmirror, start, end, false); 360 + if (ret) 361 + break; 362 + cmd->faults++; 363 + } 364 + 365 + if (ret == 0) { 366 + if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 367 + bounce.size)) 368 + ret = -EFAULT; 369 + } 370 + cmd->cpages = bounce.cpages; 371 + dmirror_bounce_fini(&bounce); 372 + return ret; 373 + } 374 + 375 + static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, 376 + unsigned long end, struct dmirror_bounce *bounce) 377 + { 378 + unsigned long pfn; 379 + void *ptr; 380 + 381 + ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 382 + 383 + for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 384 + void *entry; 385 + struct page *page; 386 + void *tmp; 387 + 388 + entry = xa_load(&dmirror->pt, pfn); 389 + page = xa_untag_pointer(entry); 390 + if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) 391 + return -ENOENT; 392 + 393 + tmp = kmap(page); 394 + memcpy(tmp, ptr, PAGE_SIZE); 395 + kunmap(page); 396 + 397 + ptr += PAGE_SIZE; 398 + bounce->cpages++; 399 + } 400 + 401 + return 0; 402 + } 403 + 404 + static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 405 + { 406 + struct dmirror_bounce bounce; 407 + unsigned long start, end; 408 + unsigned long size = cmd->npages << PAGE_SHIFT; 409 + int ret; 410 + 411 + start = cmd->addr; 412 + end = start + size; 413 + if (end < start) 414 + return -EINVAL; 415 + 416 + ret = dmirror_bounce_init(&bounce, start, size); 417 + if (ret) 418 + return ret; 419 + if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), 420 + bounce.size)) { 421 + ret = -EFAULT; 422 + goto fini; 423 + } 424 + 425 + while (1) { 426 + mutex_lock(&dmirror->mutex); 427 + ret = dmirror_do_write(dmirror, start, end, &bounce); 428 + mutex_unlock(&dmirror->mutex); 429 + if (ret != -ENOENT) 430 + break; 431 + 432 + start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 433 + ret = dmirror_fault(dmirror, start, end, true); 434 + if (ret) 435 + break; 436 + cmd->faults++; 437 + } 438 + 439 + fini: 440 + cmd->cpages = bounce.cpages; 441 + dmirror_bounce_fini(&bounce); 442 + return ret; 443 + } 444 + 445 + static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, 446 + struct page **ppage) 447 + { 448 + struct dmirror_chunk *devmem; 449 + struct resource *res; 450 + unsigned long pfn; 451 + unsigned long pfn_first; 452 + unsigned long pfn_last; 453 + void *ptr; 454 + 455 + mutex_lock(&mdevice->devmem_lock); 456 + 457 + if (mdevice->devmem_count == mdevice->devmem_capacity) { 458 + struct dmirror_chunk **new_chunks; 459 + unsigned int new_capacity; 460 + 461 + new_capacity = mdevice->devmem_capacity + 462 + DEVMEM_CHUNKS_RESERVE; 463 + new_chunks = krealloc(mdevice->devmem_chunks, 464 + sizeof(new_chunks[0]) * new_capacity, 465 + GFP_KERNEL); 466 + if (!new_chunks) 467 + goto err; 468 + mdevice->devmem_capacity = new_capacity; 469 + mdevice->devmem_chunks = new_chunks; 470 + } 471 + 472 + res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, 473 + "hmm_dmirror"); 474 + if (IS_ERR(res)) 475 + goto err; 476 + 477 + devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); 478 + if (!devmem) 479 + goto err_release; 480 + 481 + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 482 + devmem->pagemap.res = *res; 483 + devmem->pagemap.ops = &dmirror_devmem_ops; 484 + devmem->pagemap.owner = mdevice; 485 + 486 + ptr = memremap_pages(&devmem->pagemap, numa_node_id()); 487 + if (IS_ERR(ptr)) 488 + goto err_free; 489 + 490 + devmem->mdevice = mdevice; 491 + pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT; 492 + pfn_last = pfn_first + 493 + (resource_size(&devmem->pagemap.res) >> PAGE_SHIFT); 494 + mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; 495 + 496 + mutex_unlock(&mdevice->devmem_lock); 497 + 498 + pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", 499 + DEVMEM_CHUNK_SIZE / (1024 * 1024), 500 + mdevice->devmem_count, 501 + mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), 502 + pfn_first, pfn_last); 503 + 504 + spin_lock(&mdevice->lock); 505 + for (pfn = pfn_first; pfn < pfn_last; pfn++) { 506 + struct page *page = pfn_to_page(pfn); 507 + 508 + page->zone_device_data = mdevice->free_pages; 509 + mdevice->free_pages = page; 510 + } 511 + if (ppage) { 512 + *ppage = mdevice->free_pages; 513 + mdevice->free_pages = (*ppage)->zone_device_data; 514 + mdevice->calloc++; 515 + } 516 + spin_unlock(&mdevice->lock); 517 + 518 + return true; 519 + 520 + err_free: 521 + kfree(devmem); 522 + err_release: 523 + release_mem_region(devmem->pagemap.res.start, 524 + resource_size(&devmem->pagemap.res)); 525 + err: 526 + mutex_unlock(&mdevice->devmem_lock); 527 + return false; 528 + } 529 + 530 + static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) 531 + { 532 + struct page *dpage = NULL; 533 + struct page *rpage; 534 + 535 + /* 536 + * This is a fake device so we alloc real system memory to store 537 + * our device memory. 538 + */ 539 + rpage = alloc_page(GFP_HIGHUSER); 540 + if (!rpage) 541 + return NULL; 542 + 543 + spin_lock(&mdevice->lock); 544 + 545 + if (mdevice->free_pages) { 546 + dpage = mdevice->free_pages; 547 + mdevice->free_pages = dpage->zone_device_data; 548 + mdevice->calloc++; 549 + spin_unlock(&mdevice->lock); 550 + } else { 551 + spin_unlock(&mdevice->lock); 552 + if (!dmirror_allocate_chunk(mdevice, &dpage)) 553 + goto error; 554 + } 555 + 556 + dpage->zone_device_data = rpage; 557 + get_page(dpage); 558 + lock_page(dpage); 559 + return dpage; 560 + 561 + error: 562 + __free_page(rpage); 563 + return NULL; 564 + } 565 + 566 + static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, 567 + struct dmirror *dmirror) 568 + { 569 + struct dmirror_device *mdevice = dmirror->mdevice; 570 + const unsigned long *src = args->src; 571 + unsigned long *dst = args->dst; 572 + unsigned long addr; 573 + 574 + for (addr = args->start; addr < args->end; addr += PAGE_SIZE, 575 + src++, dst++) { 576 + struct page *spage; 577 + struct page *dpage; 578 + struct page *rpage; 579 + 580 + if (!(*src & MIGRATE_PFN_MIGRATE)) 581 + continue; 582 + 583 + /* 584 + * Note that spage might be NULL which is OK since it is an 585 + * unallocated pte_none() or read-only zero page. 586 + */ 587 + spage = migrate_pfn_to_page(*src); 588 + 589 + /* 590 + * Don't migrate device private pages from our own driver or 591 + * others. For our own we would do a device private memory copy 592 + * not a migration and for others, we would need to fault the 593 + * other device's page into system memory first. 594 + */ 595 + if (spage && is_zone_device_page(spage)) 596 + continue; 597 + 598 + dpage = dmirror_devmem_alloc_page(mdevice); 599 + if (!dpage) 600 + continue; 601 + 602 + rpage = dpage->zone_device_data; 603 + if (spage) 604 + copy_highpage(rpage, spage); 605 + else 606 + clear_highpage(rpage); 607 + 608 + /* 609 + * Normally, a device would use the page->zone_device_data to 610 + * point to the mirror but here we use it to hold the page for 611 + * the simulated device memory and that page holds the pointer 612 + * to the mirror. 613 + */ 614 + rpage->zone_device_data = dmirror; 615 + 616 + *dst = migrate_pfn(page_to_pfn(dpage)) | 617 + MIGRATE_PFN_LOCKED; 618 + if ((*src & MIGRATE_PFN_WRITE) || 619 + (!spage && args->vma->vm_flags & VM_WRITE)) 620 + *dst |= MIGRATE_PFN_WRITE; 621 + } 622 + } 623 + 624 + static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 625 + struct dmirror *dmirror) 626 + { 627 + unsigned long start = args->start; 628 + unsigned long end = args->end; 629 + const unsigned long *src = args->src; 630 + const unsigned long *dst = args->dst; 631 + unsigned long pfn; 632 + 633 + /* Map the migrated pages into the device's page tables. */ 634 + mutex_lock(&dmirror->mutex); 635 + 636 + for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, 637 + src++, dst++) { 638 + struct page *dpage; 639 + void *entry; 640 + 641 + if (!(*src & MIGRATE_PFN_MIGRATE)) 642 + continue; 643 + 644 + dpage = migrate_pfn_to_page(*dst); 645 + if (!dpage) 646 + continue; 647 + 648 + /* 649 + * Store the page that holds the data so the page table 650 + * doesn't have to deal with ZONE_DEVICE private pages. 651 + */ 652 + entry = dpage->zone_device_data; 653 + if (*dst & MIGRATE_PFN_WRITE) 654 + entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 655 + entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 656 + if (xa_is_err(entry)) { 657 + mutex_unlock(&dmirror->mutex); 658 + return xa_err(entry); 659 + } 660 + } 661 + 662 + mutex_unlock(&dmirror->mutex); 663 + return 0; 664 + } 665 + 666 + static int dmirror_migrate(struct dmirror *dmirror, 667 + struct hmm_dmirror_cmd *cmd) 668 + { 669 + unsigned long start, end, addr; 670 + unsigned long size = cmd->npages << PAGE_SHIFT; 671 + struct mm_struct *mm = dmirror->notifier.mm; 672 + struct vm_area_struct *vma; 673 + unsigned long src_pfns[64]; 674 + unsigned long dst_pfns[64]; 675 + struct dmirror_bounce bounce; 676 + struct migrate_vma args; 677 + unsigned long next; 678 + int ret; 679 + 680 + start = cmd->addr; 681 + end = start + size; 682 + if (end < start) 683 + return -EINVAL; 684 + 685 + /* Since the mm is for the mirrored process, get a reference first. */ 686 + if (!mmget_not_zero(mm)) 687 + return -EINVAL; 688 + 689 + down_read(&mm->mmap_sem); 690 + for (addr = start; addr < end; addr = next) { 691 + vma = find_vma(mm, addr); 692 + if (!vma || addr < vma->vm_start || 693 + !(vma->vm_flags & VM_READ)) { 694 + ret = -EINVAL; 695 + goto out; 696 + } 697 + next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 698 + if (next > vma->vm_end) 699 + next = vma->vm_end; 700 + 701 + args.vma = vma; 702 + args.src = src_pfns; 703 + args.dst = dst_pfns; 704 + args.start = addr; 705 + args.end = next; 706 + args.src_owner = NULL; 707 + ret = migrate_vma_setup(&args); 708 + if (ret) 709 + goto out; 710 + 711 + dmirror_migrate_alloc_and_copy(&args, dmirror); 712 + migrate_vma_pages(&args); 713 + dmirror_migrate_finalize_and_map(&args, dmirror); 714 + migrate_vma_finalize(&args); 715 + } 716 + up_read(&mm->mmap_sem); 717 + mmput(mm); 718 + 719 + /* Return the migrated data for verification. */ 720 + ret = dmirror_bounce_init(&bounce, start, size); 721 + if (ret) 722 + return ret; 723 + mutex_lock(&dmirror->mutex); 724 + ret = dmirror_do_read(dmirror, start, end, &bounce); 725 + mutex_unlock(&dmirror->mutex); 726 + if (ret == 0) { 727 + if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 728 + bounce.size)) 729 + ret = -EFAULT; 730 + } 731 + cmd->cpages = bounce.cpages; 732 + dmirror_bounce_fini(&bounce); 733 + return ret; 734 + 735 + out: 736 + up_read(&mm->mmap_sem); 737 + mmput(mm); 738 + return ret; 739 + } 740 + 741 + static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, 742 + unsigned char *perm, unsigned long entry) 743 + { 744 + struct page *page; 745 + 746 + if (entry & HMM_PFN_ERROR) { 747 + *perm = HMM_DMIRROR_PROT_ERROR; 748 + return; 749 + } 750 + if (!(entry & HMM_PFN_VALID)) { 751 + *perm = HMM_DMIRROR_PROT_NONE; 752 + return; 753 + } 754 + 755 + page = hmm_pfn_to_page(entry); 756 + if (is_device_private_page(page)) { 757 + /* Is the page migrated to this device or some other? */ 758 + if (dmirror->mdevice == dmirror_page_to_device(page)) 759 + *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; 760 + else 761 + *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; 762 + } else if (is_zero_pfn(page_to_pfn(page))) 763 + *perm = HMM_DMIRROR_PROT_ZERO; 764 + else 765 + *perm = HMM_DMIRROR_PROT_NONE; 766 + if (entry & HMM_PFN_WRITE) 767 + *perm |= HMM_DMIRROR_PROT_WRITE; 768 + else 769 + *perm |= HMM_DMIRROR_PROT_READ; 770 + } 771 + 772 + static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, 773 + const struct mmu_notifier_range *range, 774 + unsigned long cur_seq) 775 + { 776 + struct dmirror_interval *dmi = 777 + container_of(mni, struct dmirror_interval, notifier); 778 + struct dmirror *dmirror = dmi->dmirror; 779 + 780 + if (mmu_notifier_range_blockable(range)) 781 + mutex_lock(&dmirror->mutex); 782 + else if (!mutex_trylock(&dmirror->mutex)) 783 + return false; 784 + 785 + /* 786 + * Snapshots only need to set the sequence number since any 787 + * invalidation in the interval invalidates the whole snapshot. 788 + */ 789 + mmu_interval_set_seq(mni, cur_seq); 790 + 791 + mutex_unlock(&dmirror->mutex); 792 + return true; 793 + } 794 + 795 + static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { 796 + .invalidate = dmirror_snapshot_invalidate, 797 + }; 798 + 799 + static int dmirror_range_snapshot(struct dmirror *dmirror, 800 + struct hmm_range *range, 801 + unsigned char *perm) 802 + { 803 + struct mm_struct *mm = dmirror->notifier.mm; 804 + struct dmirror_interval notifier; 805 + unsigned long timeout = 806 + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 807 + unsigned long i; 808 + unsigned long n; 809 + int ret = 0; 810 + 811 + notifier.dmirror = dmirror; 812 + range->notifier = &notifier.notifier; 813 + 814 + ret = mmu_interval_notifier_insert(range->notifier, mm, 815 + range->start, range->end - range->start, 816 + &dmirror_mrn_ops); 817 + if (ret) 818 + return ret; 819 + 820 + while (true) { 821 + if (time_after(jiffies, timeout)) { 822 + ret = -EBUSY; 823 + goto out; 824 + } 825 + 826 + range->notifier_seq = mmu_interval_read_begin(range->notifier); 827 + 828 + down_read(&mm->mmap_sem); 829 + ret = hmm_range_fault(range); 830 + up_read(&mm->mmap_sem); 831 + if (ret) { 832 + if (ret == -EBUSY) 833 + continue; 834 + goto out; 835 + } 836 + 837 + mutex_lock(&dmirror->mutex); 838 + if (mmu_interval_read_retry(range->notifier, 839 + range->notifier_seq)) { 840 + mutex_unlock(&dmirror->mutex); 841 + continue; 842 + } 843 + break; 844 + } 845 + 846 + n = (range->end - range->start) >> PAGE_SHIFT; 847 + for (i = 0; i < n; i++) 848 + dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); 849 + 850 + mutex_unlock(&dmirror->mutex); 851 + out: 852 + mmu_interval_notifier_remove(range->notifier); 853 + return ret; 854 + } 855 + 856 + static int dmirror_snapshot(struct dmirror *dmirror, 857 + struct hmm_dmirror_cmd *cmd) 858 + { 859 + struct mm_struct *mm = dmirror->notifier.mm; 860 + unsigned long start, end; 861 + unsigned long size = cmd->npages << PAGE_SHIFT; 862 + unsigned long addr; 863 + unsigned long next; 864 + unsigned long pfns[64]; 865 + unsigned char perm[64]; 866 + char __user *uptr; 867 + struct hmm_range range = { 868 + .hmm_pfns = pfns, 869 + .dev_private_owner = dmirror->mdevice, 870 + }; 871 + int ret = 0; 872 + 873 + start = cmd->addr; 874 + end = start + size; 875 + if (end < start) 876 + return -EINVAL; 877 + 878 + /* Since the mm is for the mirrored process, get a reference first. */ 879 + if (!mmget_not_zero(mm)) 880 + return -EINVAL; 881 + 882 + /* 883 + * Register a temporary notifier to detect invalidations even if it 884 + * overlaps with other mmu_interval_notifiers. 885 + */ 886 + uptr = u64_to_user_ptr(cmd->ptr); 887 + for (addr = start; addr < end; addr = next) { 888 + unsigned long n; 889 + 890 + next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 891 + range.start = addr; 892 + range.end = next; 893 + 894 + ret = dmirror_range_snapshot(dmirror, &range, perm); 895 + if (ret) 896 + break; 897 + 898 + n = (range.end - range.start) >> PAGE_SHIFT; 899 + if (copy_to_user(uptr, perm, n)) { 900 + ret = -EFAULT; 901 + break; 902 + } 903 + 904 + cmd->cpages += n; 905 + uptr += n; 906 + } 907 + mmput(mm); 908 + 909 + return ret; 910 + } 911 + 912 + static long dmirror_fops_unlocked_ioctl(struct file *filp, 913 + unsigned int command, 914 + unsigned long arg) 915 + { 916 + void __user *uarg = (void __user *)arg; 917 + struct hmm_dmirror_cmd cmd; 918 + struct dmirror *dmirror; 919 + int ret; 920 + 921 + dmirror = filp->private_data; 922 + if (!dmirror) 923 + return -EINVAL; 924 + 925 + if (copy_from_user(&cmd, uarg, sizeof(cmd))) 926 + return -EFAULT; 927 + 928 + if (cmd.addr & ~PAGE_MASK) 929 + return -EINVAL; 930 + if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) 931 + return -EINVAL; 932 + 933 + cmd.cpages = 0; 934 + cmd.faults = 0; 935 + 936 + switch (command) { 937 + case HMM_DMIRROR_READ: 938 + ret = dmirror_read(dmirror, &cmd); 939 + break; 940 + 941 + case HMM_DMIRROR_WRITE: 942 + ret = dmirror_write(dmirror, &cmd); 943 + break; 944 + 945 + case HMM_DMIRROR_MIGRATE: 946 + ret = dmirror_migrate(dmirror, &cmd); 947 + break; 948 + 949 + case HMM_DMIRROR_SNAPSHOT: 950 + ret = dmirror_snapshot(dmirror, &cmd); 951 + break; 952 + 953 + default: 954 + return -EINVAL; 955 + } 956 + if (ret) 957 + return ret; 958 + 959 + if (copy_to_user(uarg, &cmd, sizeof(cmd))) 960 + return -EFAULT; 961 + 962 + return 0; 963 + } 964 + 965 + static const struct file_operations dmirror_fops = { 966 + .open = dmirror_fops_open, 967 + .release = dmirror_fops_release, 968 + .unlocked_ioctl = dmirror_fops_unlocked_ioctl, 969 + .llseek = default_llseek, 970 + .owner = THIS_MODULE, 971 + }; 972 + 973 + static void dmirror_devmem_free(struct page *page) 974 + { 975 + struct page *rpage = page->zone_device_data; 976 + struct dmirror_device *mdevice; 977 + 978 + if (rpage) 979 + __free_page(rpage); 980 + 981 + mdevice = dmirror_page_to_device(page); 982 + 983 + spin_lock(&mdevice->lock); 984 + mdevice->cfree++; 985 + page->zone_device_data = mdevice->free_pages; 986 + mdevice->free_pages = page; 987 + spin_unlock(&mdevice->lock); 988 + } 989 + 990 + static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, 991 + struct dmirror_device *mdevice) 992 + { 993 + const unsigned long *src = args->src; 994 + unsigned long *dst = args->dst; 995 + unsigned long start = args->start; 996 + unsigned long end = args->end; 997 + unsigned long addr; 998 + 999 + for (addr = start; addr < end; addr += PAGE_SIZE, 1000 + src++, dst++) { 1001 + struct page *dpage, *spage; 1002 + 1003 + spage = migrate_pfn_to_page(*src); 1004 + if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) 1005 + continue; 1006 + spage = spage->zone_device_data; 1007 + 1008 + dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); 1009 + if (!dpage) 1010 + continue; 1011 + 1012 + lock_page(dpage); 1013 + copy_highpage(dpage, spage); 1014 + *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; 1015 + if (*src & MIGRATE_PFN_WRITE) 1016 + *dst |= MIGRATE_PFN_WRITE; 1017 + } 1018 + return 0; 1019 + } 1020 + 1021 + static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma *args, 1022 + struct dmirror *dmirror) 1023 + { 1024 + /* Invalidate the device's page table mapping. */ 1025 + mutex_lock(&dmirror->mutex); 1026 + dmirror_do_update(dmirror, args->start, args->end); 1027 + mutex_unlock(&dmirror->mutex); 1028 + } 1029 + 1030 + static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) 1031 + { 1032 + struct migrate_vma args; 1033 + unsigned long src_pfns; 1034 + unsigned long dst_pfns; 1035 + struct page *rpage; 1036 + struct dmirror *dmirror; 1037 + vm_fault_t ret; 1038 + 1039 + /* 1040 + * Normally, a device would use the page->zone_device_data to point to 1041 + * the mirror but here we use it to hold the page for the simulated 1042 + * device memory and that page holds the pointer to the mirror. 1043 + */ 1044 + rpage = vmf->page->zone_device_data; 1045 + dmirror = rpage->zone_device_data; 1046 + 1047 + /* FIXME demonstrate how we can adjust migrate range */ 1048 + args.vma = vmf->vma; 1049 + args.start = vmf->address; 1050 + args.end = args.start + PAGE_SIZE; 1051 + args.src = &src_pfns; 1052 + args.dst = &dst_pfns; 1053 + args.src_owner = dmirror->mdevice; 1054 + 1055 + if (migrate_vma_setup(&args)) 1056 + return VM_FAULT_SIGBUS; 1057 + 1058 + ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror->mdevice); 1059 + if (ret) 1060 + return ret; 1061 + migrate_vma_pages(&args); 1062 + dmirror_devmem_fault_finalize_and_map(&args, dmirror); 1063 + migrate_vma_finalize(&args); 1064 + return 0; 1065 + } 1066 + 1067 + static const struct dev_pagemap_ops dmirror_devmem_ops = { 1068 + .page_free = dmirror_devmem_free, 1069 + .migrate_to_ram = dmirror_devmem_fault, 1070 + }; 1071 + 1072 + static int dmirror_device_init(struct dmirror_device *mdevice, int id) 1073 + { 1074 + dev_t dev; 1075 + int ret; 1076 + 1077 + dev = MKDEV(MAJOR(dmirror_dev), id); 1078 + mutex_init(&mdevice->devmem_lock); 1079 + spin_lock_init(&mdevice->lock); 1080 + 1081 + cdev_init(&mdevice->cdevice, &dmirror_fops); 1082 + mdevice->cdevice.owner = THIS_MODULE; 1083 + ret = cdev_add(&mdevice->cdevice, dev, 1); 1084 + if (ret) 1085 + return ret; 1086 + 1087 + /* Build a list of free ZONE_DEVICE private struct pages */ 1088 + dmirror_allocate_chunk(mdevice, NULL); 1089 + 1090 + return 0; 1091 + } 1092 + 1093 + static void dmirror_device_remove(struct dmirror_device *mdevice) 1094 + { 1095 + unsigned int i; 1096 + 1097 + if (mdevice->devmem_chunks) { 1098 + for (i = 0; i < mdevice->devmem_count; i++) { 1099 + struct dmirror_chunk *devmem = 1100 + mdevice->devmem_chunks[i]; 1101 + 1102 + memunmap_pages(&devmem->pagemap); 1103 + release_mem_region(devmem->pagemap.res.start, 1104 + resource_size(&devmem->pagemap.res)); 1105 + kfree(devmem); 1106 + } 1107 + kfree(mdevice->devmem_chunks); 1108 + } 1109 + 1110 + cdev_del(&mdevice->cdevice); 1111 + } 1112 + 1113 + static int __init hmm_dmirror_init(void) 1114 + { 1115 + int ret; 1116 + int id; 1117 + 1118 + ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, 1119 + "HMM_DMIRROR"); 1120 + if (ret) 1121 + goto err_unreg; 1122 + 1123 + for (id = 0; id < DMIRROR_NDEVICES; id++) { 1124 + ret = dmirror_device_init(dmirror_devices + id, id); 1125 + if (ret) 1126 + goto err_chrdev; 1127 + } 1128 + 1129 + /* 1130 + * Allocate a zero page to simulate a reserved page of device private 1131 + * memory which is always zero. The zero_pfn page isn't used just to 1132 + * make the code here simpler (i.e., we need a struct page for it). 1133 + */ 1134 + dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); 1135 + if (!dmirror_zero_page) { 1136 + ret = -ENOMEM; 1137 + goto err_chrdev; 1138 + } 1139 + 1140 + pr_info("HMM test module loaded. This is only for testing HMM.\n"); 1141 + return 0; 1142 + 1143 + err_chrdev: 1144 + while (--id >= 0) 1145 + dmirror_device_remove(dmirror_devices + id); 1146 + unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1147 + err_unreg: 1148 + return ret; 1149 + } 1150 + 1151 + static void __exit hmm_dmirror_exit(void) 1152 + { 1153 + int id; 1154 + 1155 + if (dmirror_zero_page) 1156 + __free_page(dmirror_zero_page); 1157 + for (id = 0; id < DMIRROR_NDEVICES; id++) 1158 + dmirror_device_remove(dmirror_devices + id); 1159 + unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1160 + } 1161 + 1162 + module_init(hmm_dmirror_init); 1163 + module_exit(hmm_dmirror_exit); 1164 + MODULE_LICENSE("GPL");
+59
lib/test_hmm_uapi.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * This is a module to test the HMM (Heterogeneous Memory Management) API 4 + * of the kernel. It allows a userspace program to expose its entire address 5 + * space through the HMM test module device file. 6 + */ 7 + #ifndef _LIB_TEST_HMM_UAPI_H 8 + #define _LIB_TEST_HMM_UAPI_H 9 + 10 + #include <linux/types.h> 11 + #include <linux/ioctl.h> 12 + 13 + /* 14 + * Structure to pass to the HMM test driver to mimic a device accessing 15 + * system memory and ZONE_DEVICE private memory through device page tables. 16 + * 17 + * @addr: (in) user address the device will read/write 18 + * @ptr: (in) user address where device data is copied to/from 19 + * @npages: (in) number of pages to read/write 20 + * @cpages: (out) number of pages copied 21 + * @faults: (out) number of device page faults seen 22 + */ 23 + struct hmm_dmirror_cmd { 24 + __u64 addr; 25 + __u64 ptr; 26 + __u64 npages; 27 + __u64 cpages; 28 + __u64 faults; 29 + }; 30 + 31 + /* Expose the address space of the calling process through hmm device file */ 32 + #define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) 33 + #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) 34 + #define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) 35 + #define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) 36 + 37 + /* 38 + * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. 39 + * HMM_DMIRROR_PROT_ERROR: no valid mirror PTE for this page 40 + * HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access 41 + * HMM_DMIRROR_PROT_READ: read-only PTE 42 + * HMM_DMIRROR_PROT_WRITE: read/write PTE 43 + * HMM_DMIRROR_PROT_ZERO: special read-only zero page 44 + * HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the 45 + * device the ioctl() is made 46 + * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some 47 + * other device 48 + */ 49 + enum { 50 + HMM_DMIRROR_PROT_ERROR = 0xFF, 51 + HMM_DMIRROR_PROT_NONE = 0x00, 52 + HMM_DMIRROR_PROT_READ = 0x01, 53 + HMM_DMIRROR_PROT_WRITE = 0x02, 54 + HMM_DMIRROR_PROT_ZERO = 0x10, 55 + HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, 56 + HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, 57 + }; 58 + 59 + #endif /* _LIB_TEST_HMM_UAPI_H */
+85 -100
mm/hmm.c
··· 37 37 HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT, 38 38 }; 39 39 40 - /* 41 - * hmm_device_entry_from_pfn() - create a valid device entry value from pfn 42 - * @range: range use to encode HMM pfn value 43 - * @pfn: pfn value for which to create the device entry 44 - * Return: valid device entry for the pfn 45 - */ 46 - static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, 47 - unsigned long pfn) 48 - { 49 - return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID]; 50 - } 51 - 52 40 static int hmm_pfns_fill(unsigned long addr, unsigned long end, 53 - struct hmm_range *range, enum hmm_pfn_value_e value) 41 + struct hmm_range *range, unsigned long cpu_flags) 54 42 { 55 - uint64_t *pfns = range->pfns; 56 - unsigned long i; 43 + unsigned long i = (addr - range->start) >> PAGE_SHIFT; 57 44 58 - i = (addr - range->start) >> PAGE_SHIFT; 59 45 for (; addr < end; addr += PAGE_SIZE, i++) 60 - pfns[i] = range->values[value]; 61 - 46 + range->hmm_pfns[i] = cpu_flags; 62 47 return 0; 63 48 } 64 49 ··· 81 96 } 82 97 83 98 static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 84 - uint64_t pfns, uint64_t cpu_flags) 99 + unsigned long pfn_req_flags, 100 + unsigned long cpu_flags) 85 101 { 86 102 struct hmm_range *range = hmm_vma_walk->range; 87 103 ··· 96 110 * waste to have the user pre-fill the pfn arrays with a default 97 111 * flags value. 98 112 */ 99 - pfns = (pfns & range->pfn_flags_mask) | range->default_flags; 113 + pfn_req_flags &= range->pfn_flags_mask; 114 + pfn_req_flags |= range->default_flags; 100 115 101 116 /* We aren't ask to do anything ... */ 102 - if (!(pfns & range->flags[HMM_PFN_VALID])) 117 + if (!(pfn_req_flags & HMM_PFN_REQ_FAULT)) 103 118 return 0; 104 119 105 120 /* Need to write fault ? */ 106 - if ((pfns & range->flags[HMM_PFN_WRITE]) && 107 - !(cpu_flags & range->flags[HMM_PFN_WRITE])) 121 + if ((pfn_req_flags & HMM_PFN_REQ_WRITE) && 122 + !(cpu_flags & HMM_PFN_WRITE)) 108 123 return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT; 109 124 110 125 /* If CPU page table is not valid then we need to fault */ 111 - if (!(cpu_flags & range->flags[HMM_PFN_VALID])) 126 + if (!(cpu_flags & HMM_PFN_VALID)) 112 127 return HMM_NEED_FAULT; 113 128 return 0; 114 129 } 115 130 116 131 static unsigned int 117 132 hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 118 - const uint64_t *pfns, unsigned long npages, 119 - uint64_t cpu_flags) 133 + const unsigned long hmm_pfns[], unsigned long npages, 134 + unsigned long cpu_flags) 120 135 { 121 136 struct hmm_range *range = hmm_vma_walk->range; 122 137 unsigned int required_fault = 0; ··· 129 142 * hmm_pte_need_fault() will always return 0. 130 143 */ 131 144 if (!((range->default_flags | range->pfn_flags_mask) & 132 - range->flags[HMM_PFN_VALID])) 145 + HMM_PFN_REQ_FAULT)) 133 146 return 0; 134 147 135 148 for (i = 0; i < npages; ++i) { 136 - required_fault |= 137 - hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags); 149 + required_fault |= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i], 150 + cpu_flags); 138 151 if (required_fault == HMM_NEED_ALL_BITS) 139 152 return required_fault; 140 153 } ··· 148 161 struct hmm_range *range = hmm_vma_walk->range; 149 162 unsigned int required_fault; 150 163 unsigned long i, npages; 151 - uint64_t *pfns; 164 + unsigned long *hmm_pfns; 152 165 153 166 i = (addr - range->start) >> PAGE_SHIFT; 154 167 npages = (end - addr) >> PAGE_SHIFT; 155 - pfns = &range->pfns[i]; 156 - required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0); 168 + hmm_pfns = &range->hmm_pfns[i]; 169 + required_fault = 170 + hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0); 157 171 if (!walk->vma) { 158 172 if (required_fault) 159 173 return -EFAULT; ··· 162 174 } 163 175 if (required_fault) 164 176 return hmm_vma_fault(addr, end, required_fault, walk); 165 - hmm_vma_walk->last = addr; 166 - return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE); 177 + return hmm_pfns_fill(addr, end, range, 0); 167 178 } 168 179 169 - static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) 180 + static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range, 181 + pmd_t pmd) 170 182 { 171 183 if (pmd_protnone(pmd)) 172 184 return 0; 173 - return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] | 174 - range->flags[HMM_PFN_WRITE] : 175 - range->flags[HMM_PFN_VALID]; 185 + return pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID; 176 186 } 177 187 178 188 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 179 189 static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 180 - unsigned long end, uint64_t *pfns, pmd_t pmd) 190 + unsigned long end, unsigned long hmm_pfns[], 191 + pmd_t pmd) 181 192 { 182 193 struct hmm_vma_walk *hmm_vma_walk = walk->private; 183 194 struct hmm_range *range = hmm_vma_walk->range; 184 195 unsigned long pfn, npages, i; 185 196 unsigned int required_fault; 186 - uint64_t cpu_flags; 197 + unsigned long cpu_flags; 187 198 188 199 npages = (end - addr) >> PAGE_SHIFT; 189 200 cpu_flags = pmd_to_hmm_pfn_flags(range, pmd); 190 201 required_fault = 191 - hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags); 202 + hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags); 192 203 if (required_fault) 193 204 return hmm_vma_fault(addr, end, required_fault, walk); 194 205 195 206 pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 196 207 for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) 197 - pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; 198 - hmm_vma_walk->last = end; 208 + hmm_pfns[i] = pfn | cpu_flags; 199 209 return 0; 200 210 } 201 211 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 202 212 /* stub to allow the code below to compile */ 203 213 int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 204 - unsigned long end, uint64_t *pfns, pmd_t pmd); 214 + unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); 205 215 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 206 216 207 217 static inline bool hmm_is_device_private_entry(struct hmm_range *range, ··· 210 224 range->dev_private_owner; 211 225 } 212 226 213 - static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) 227 + static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, 228 + pte_t pte) 214 229 { 215 230 if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte)) 216 231 return 0; 217 - return pte_write(pte) ? range->flags[HMM_PFN_VALID] | 218 - range->flags[HMM_PFN_WRITE] : 219 - range->flags[HMM_PFN_VALID]; 232 + return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID; 220 233 } 221 234 222 235 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, 223 236 unsigned long end, pmd_t *pmdp, pte_t *ptep, 224 - uint64_t *pfn) 237 + unsigned long *hmm_pfn) 225 238 { 226 239 struct hmm_vma_walk *hmm_vma_walk = walk->private; 227 240 struct hmm_range *range = hmm_vma_walk->range; 228 241 unsigned int required_fault; 229 - uint64_t cpu_flags; 242 + unsigned long cpu_flags; 230 243 pte_t pte = *ptep; 231 - uint64_t orig_pfn = *pfn; 244 + uint64_t pfn_req_flags = *hmm_pfn; 232 245 233 246 if (pte_none(pte)) { 234 - required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0); 247 + required_fault = 248 + hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); 235 249 if (required_fault) 236 250 goto fault; 237 - *pfn = range->values[HMM_PFN_NONE]; 251 + *hmm_pfn = 0; 238 252 return 0; 239 253 } 240 254 ··· 246 260 * the PFN even if not present. 247 261 */ 248 262 if (hmm_is_device_private_entry(range, entry)) { 249 - *pfn = hmm_device_entry_from_pfn(range, 250 - device_private_entry_to_pfn(entry)); 251 - *pfn |= range->flags[HMM_PFN_VALID]; 263 + cpu_flags = HMM_PFN_VALID; 252 264 if (is_write_device_private_entry(entry)) 253 - *pfn |= range->flags[HMM_PFN_WRITE]; 265 + cpu_flags |= HMM_PFN_WRITE; 266 + *hmm_pfn = device_private_entry_to_pfn(entry) | 267 + cpu_flags; 254 268 return 0; 255 269 } 256 270 257 - required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0); 271 + required_fault = 272 + hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); 258 273 if (!required_fault) { 259 - *pfn = range->values[HMM_PFN_NONE]; 274 + *hmm_pfn = 0; 260 275 return 0; 261 276 } 262 277 ··· 277 290 } 278 291 279 292 cpu_flags = pte_to_hmm_pfn_flags(range, pte); 280 - required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags); 293 + required_fault = 294 + hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags); 281 295 if (required_fault) 282 296 goto fault; 283 297 ··· 287 299 * fall through and treat it like a normal page. 288 300 */ 289 301 if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) { 290 - if (hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0)) { 302 + if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) { 291 303 pte_unmap(ptep); 292 304 return -EFAULT; 293 305 } 294 - *pfn = range->values[HMM_PFN_SPECIAL]; 306 + *hmm_pfn = HMM_PFN_ERROR; 295 307 return 0; 296 308 } 297 309 298 - *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags; 310 + *hmm_pfn = pte_pfn(pte) | cpu_flags; 299 311 return 0; 300 312 301 313 fault: ··· 311 323 { 312 324 struct hmm_vma_walk *hmm_vma_walk = walk->private; 313 325 struct hmm_range *range = hmm_vma_walk->range; 314 - uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT]; 326 + unsigned long *hmm_pfns = 327 + &range->hmm_pfns[(start - range->start) >> PAGE_SHIFT]; 315 328 unsigned long npages = (end - start) >> PAGE_SHIFT; 316 329 unsigned long addr = start; 317 330 pte_t *ptep; ··· 324 335 return hmm_vma_walk_hole(start, end, -1, walk); 325 336 326 337 if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { 327 - if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) { 338 + if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) { 328 339 hmm_vma_walk->last = addr; 329 340 pmd_migration_entry_wait(walk->mm, pmdp); 330 341 return -EBUSY; 331 342 } 332 - return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); 343 + return hmm_pfns_fill(start, end, range, 0); 333 344 } 334 345 335 346 if (!pmd_present(pmd)) { 336 - if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) 347 + if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) 337 348 return -EFAULT; 338 349 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 339 350 } ··· 353 364 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) 354 365 goto again; 355 366 356 - return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd); 367 + return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd); 357 368 } 358 369 359 370 /* ··· 363 374 * recover. 364 375 */ 365 376 if (pmd_bad(pmd)) { 366 - if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) 377 + if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) 367 378 return -EFAULT; 368 379 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 369 380 } 370 381 371 382 ptep = pte_offset_map(pmdp, addr); 372 - for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { 383 + for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) { 373 384 int r; 374 385 375 - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns); 386 + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns); 376 387 if (r) { 377 388 /* hmm_vma_handle_pte() did pte_unmap() */ 378 - hmm_vma_walk->last = addr; 379 389 return r; 380 390 } 381 391 } 382 392 pte_unmap(ptep - 1); 383 - 384 - hmm_vma_walk->last = addr; 385 393 return 0; 386 394 } 387 395 388 396 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \ 389 397 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) 390 - static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud) 398 + static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range, 399 + pud_t pud) 391 400 { 392 401 if (!pud_present(pud)) 393 402 return 0; 394 - return pud_write(pud) ? range->flags[HMM_PFN_VALID] | 395 - range->flags[HMM_PFN_WRITE] : 396 - range->flags[HMM_PFN_VALID]; 403 + return pud_write(pud) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID; 397 404 } 398 405 399 406 static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, ··· 417 432 if (pud_huge(pud) && pud_devmap(pud)) { 418 433 unsigned long i, npages, pfn; 419 434 unsigned int required_fault; 420 - uint64_t *pfns, cpu_flags; 435 + unsigned long *hmm_pfns; 436 + unsigned long cpu_flags; 421 437 422 438 if (!pud_present(pud)) { 423 439 spin_unlock(ptl); ··· 427 441 428 442 i = (addr - range->start) >> PAGE_SHIFT; 429 443 npages = (end - addr) >> PAGE_SHIFT; 430 - pfns = &range->pfns[i]; 444 + hmm_pfns = &range->hmm_pfns[i]; 431 445 432 446 cpu_flags = pud_to_hmm_pfn_flags(range, pud); 433 - required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, 447 + required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns, 434 448 npages, cpu_flags); 435 449 if (required_fault) { 436 450 spin_unlock(ptl); ··· 439 453 440 454 pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 441 455 for (i = 0; i < npages; ++i, ++pfn) 442 - pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 443 - cpu_flags; 444 - hmm_vma_walk->last = end; 456 + hmm_pfns[i] = pfn | cpu_flags; 445 457 goto out_unlock; 446 458 } 447 459 ··· 463 479 struct hmm_vma_walk *hmm_vma_walk = walk->private; 464 480 struct hmm_range *range = hmm_vma_walk->range; 465 481 struct vm_area_struct *vma = walk->vma; 466 - uint64_t orig_pfn, cpu_flags; 467 482 unsigned int required_fault; 483 + unsigned long pfn_req_flags; 484 + unsigned long cpu_flags; 468 485 spinlock_t *ptl; 469 486 pte_t entry; 470 487 ··· 473 488 entry = huge_ptep_get(pte); 474 489 475 490 i = (start - range->start) >> PAGE_SHIFT; 476 - orig_pfn = range->pfns[i]; 491 + pfn_req_flags = range->hmm_pfns[i]; 477 492 cpu_flags = pte_to_hmm_pfn_flags(range, entry); 478 - required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags); 493 + required_fault = 494 + hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags); 479 495 if (required_fault) { 480 496 spin_unlock(ptl); 481 497 return hmm_vma_fault(addr, end, required_fault, walk); ··· 484 498 485 499 pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); 486 500 for (; addr < end; addr += PAGE_SIZE, i++, pfn++) 487 - range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 488 - cpu_flags; 489 - hmm_vma_walk->last = end; 501 + range->hmm_pfns[i] = pfn | cpu_flags; 502 + 490 503 spin_unlock(ptl); 491 504 return 0; 492 505 } ··· 516 531 * failure. 517 532 */ 518 533 if (hmm_range_need_fault(hmm_vma_walk, 519 - range->pfns + 534 + range->hmm_pfns + 520 535 ((start - range->start) >> PAGE_SHIFT), 521 536 (end - start) >> PAGE_SHIFT, 0)) 522 537 return -EFAULT; 523 538 524 539 hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 525 - hmm_vma_walk->last = end; 526 540 527 541 /* Skip this vma and continue processing the next vma. */ 528 542 return 1; ··· 539 555 * hmm_range_fault - try to fault some address in a virtual address range 540 556 * @range: argument structure 541 557 * 542 - * Return: the number of valid pages in range->pfns[] (from range start 543 - * address), which may be zero. On error one of the following status codes 544 - * can be returned: 558 + * Returns 0 on success or one of the following error codes: 545 559 * 546 560 * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma 547 561 * (e.g., device file vma). ··· 554 572 * This is similar to get_user_pages(), except that it can read the page tables 555 573 * without mutating them (ie causing faults). 556 574 */ 557 - long hmm_range_fault(struct hmm_range *range) 575 + int hmm_range_fault(struct hmm_range *range) 558 576 { 559 577 struct hmm_vma_walk hmm_vma_walk = { 560 578 .range = range, ··· 572 590 return -EBUSY; 573 591 ret = walk_page_range(mm, hmm_vma_walk.last, range->end, 574 592 &hmm_walk_ops, &hmm_vma_walk); 593 + /* 594 + * When -EBUSY is returned the loop restarts with 595 + * hmm_vma_walk.last set to an address that has not been stored 596 + * in pfns. All entries < last in the pfn array are set to their 597 + * output, and all >= are still at their input values. 598 + */ 575 599 } while (ret == -EBUSY); 576 - 577 - if (ret) 578 - return ret; 579 - return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; 600 + return ret; 580 601 } 581 602 EXPORT_SYMBOL(hmm_range_fault);
+1
tools/testing/selftests/vm/.gitignore
··· 17 17 va_128TBswitch 18 18 map_fixed_noreplace 19 19 write_to_hugetlbfs 20 + hmm-tests
+3
tools/testing/selftests/vm/Makefile
··· 7 7 LDLIBS = -lrt 8 8 TEST_GEN_FILES = compaction_test 9 9 TEST_GEN_FILES += gup_benchmark 10 + TEST_GEN_FILES += hmm-tests 10 11 TEST_GEN_FILES += hugepage-mmap 11 12 TEST_GEN_FILES += hugepage-shm 12 13 TEST_GEN_FILES += map_hugetlb ··· 33 32 34 33 KSFT_KHDR_INSTALL := 1 35 34 include ../lib.mk 35 + 36 + $(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread 36 37 37 38 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread 38 39
+2
tools/testing/selftests/vm/config
··· 1 1 CONFIG_SYSVIPC=y 2 2 CONFIG_USERFAULTFD=y 3 3 CONFIG_TEST_VMALLOC=m 4 + CONFIG_DEVICE_PRIVATE=y 5 + CONFIG_TEST_HMM=m
+1359
tools/testing/selftests/vm/hmm-tests.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * HMM stands for Heterogeneous Memory Management, it is a helper layer inside 4 + * the linux kernel to help device drivers mirror a process address space in 5 + * the device. This allows the device to use the same address space which 6 + * makes communication and data exchange a lot easier. 7 + * 8 + * This framework's sole purpose is to exercise various code paths inside 9 + * the kernel to make sure that HMM performs as expected and to flush out any 10 + * bugs. 11 + */ 12 + 13 + #include "../kselftest_harness.h" 14 + 15 + #include <errno.h> 16 + #include <fcntl.h> 17 + #include <stdio.h> 18 + #include <stdlib.h> 19 + #include <stdint.h> 20 + #include <unistd.h> 21 + #include <strings.h> 22 + #include <time.h> 23 + #include <pthread.h> 24 + #include <hugetlbfs.h> 25 + #include <sys/types.h> 26 + #include <sys/stat.h> 27 + #include <sys/mman.h> 28 + #include <sys/ioctl.h> 29 + 30 + /* 31 + * This is a private UAPI to the kernel test module so it isn't exported 32 + * in the usual include/uapi/... directory. 33 + */ 34 + #include "../../../../lib/test_hmm_uapi.h" 35 + 36 + struct hmm_buffer { 37 + void *ptr; 38 + void *mirror; 39 + unsigned long size; 40 + int fd; 41 + uint64_t cpages; 42 + uint64_t faults; 43 + }; 44 + 45 + #define TWOMEG (1 << 21) 46 + #define HMM_BUFFER_SIZE (1024 << 12) 47 + #define HMM_PATH_MAX 64 48 + #define NTIMES 256 49 + 50 + #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) 51 + 52 + FIXTURE(hmm) 53 + { 54 + int fd; 55 + unsigned int page_size; 56 + unsigned int page_shift; 57 + }; 58 + 59 + FIXTURE(hmm2) 60 + { 61 + int fd0; 62 + int fd1; 63 + unsigned int page_size; 64 + unsigned int page_shift; 65 + }; 66 + 67 + static int hmm_open(int unit) 68 + { 69 + char pathname[HMM_PATH_MAX]; 70 + int fd; 71 + 72 + snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); 73 + fd = open(pathname, O_RDWR, 0); 74 + if (fd < 0) 75 + fprintf(stderr, "could not open hmm dmirror driver (%s)\n", 76 + pathname); 77 + return fd; 78 + } 79 + 80 + FIXTURE_SETUP(hmm) 81 + { 82 + self->page_size = sysconf(_SC_PAGE_SIZE); 83 + self->page_shift = ffs(self->page_size) - 1; 84 + 85 + self->fd = hmm_open(0); 86 + ASSERT_GE(self->fd, 0); 87 + } 88 + 89 + FIXTURE_SETUP(hmm2) 90 + { 91 + self->page_size = sysconf(_SC_PAGE_SIZE); 92 + self->page_shift = ffs(self->page_size) - 1; 93 + 94 + self->fd0 = hmm_open(0); 95 + ASSERT_GE(self->fd0, 0); 96 + self->fd1 = hmm_open(1); 97 + ASSERT_GE(self->fd1, 0); 98 + } 99 + 100 + FIXTURE_TEARDOWN(hmm) 101 + { 102 + int ret = close(self->fd); 103 + 104 + ASSERT_EQ(ret, 0); 105 + self->fd = -1; 106 + } 107 + 108 + FIXTURE_TEARDOWN(hmm2) 109 + { 110 + int ret = close(self->fd0); 111 + 112 + ASSERT_EQ(ret, 0); 113 + self->fd0 = -1; 114 + 115 + ret = close(self->fd1); 116 + ASSERT_EQ(ret, 0); 117 + self->fd1 = -1; 118 + } 119 + 120 + static int hmm_dmirror_cmd(int fd, 121 + unsigned long request, 122 + struct hmm_buffer *buffer, 123 + unsigned long npages) 124 + { 125 + struct hmm_dmirror_cmd cmd; 126 + int ret; 127 + 128 + /* Simulate a device reading system memory. */ 129 + cmd.addr = (__u64)buffer->ptr; 130 + cmd.ptr = (__u64)buffer->mirror; 131 + cmd.npages = npages; 132 + 133 + for (;;) { 134 + ret = ioctl(fd, request, &cmd); 135 + if (ret == 0) 136 + break; 137 + if (errno == EINTR) 138 + continue; 139 + return -errno; 140 + } 141 + buffer->cpages = cmd.cpages; 142 + buffer->faults = cmd.faults; 143 + 144 + return 0; 145 + } 146 + 147 + static void hmm_buffer_free(struct hmm_buffer *buffer) 148 + { 149 + if (buffer == NULL) 150 + return; 151 + 152 + if (buffer->ptr) 153 + munmap(buffer->ptr, buffer->size); 154 + free(buffer->mirror); 155 + free(buffer); 156 + } 157 + 158 + /* 159 + * Create a temporary file that will be deleted on close. 160 + */ 161 + static int hmm_create_file(unsigned long size) 162 + { 163 + char path[HMM_PATH_MAX]; 164 + int fd; 165 + 166 + strcpy(path, "/tmp"); 167 + fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); 168 + if (fd >= 0) { 169 + int r; 170 + 171 + do { 172 + r = ftruncate(fd, size); 173 + } while (r == -1 && errno == EINTR); 174 + if (!r) 175 + return fd; 176 + close(fd); 177 + } 178 + return -1; 179 + } 180 + 181 + /* 182 + * Return a random unsigned number. 183 + */ 184 + static unsigned int hmm_random(void) 185 + { 186 + static int fd = -1; 187 + unsigned int r; 188 + 189 + if (fd < 0) { 190 + fd = open("/dev/urandom", O_RDONLY); 191 + if (fd < 0) { 192 + fprintf(stderr, "%s:%d failed to open /dev/urandom\n", 193 + __FILE__, __LINE__); 194 + return ~0U; 195 + } 196 + } 197 + read(fd, &r, sizeof(r)); 198 + return r; 199 + } 200 + 201 + static void hmm_nanosleep(unsigned int n) 202 + { 203 + struct timespec t; 204 + 205 + t.tv_sec = 0; 206 + t.tv_nsec = n; 207 + nanosleep(&t, NULL); 208 + } 209 + 210 + /* 211 + * Simple NULL test of device open/close. 212 + */ 213 + TEST_F(hmm, open_close) 214 + { 215 + } 216 + 217 + /* 218 + * Read private anonymous memory. 219 + */ 220 + TEST_F(hmm, anon_read) 221 + { 222 + struct hmm_buffer *buffer; 223 + unsigned long npages; 224 + unsigned long size; 225 + unsigned long i; 226 + int *ptr; 227 + int ret; 228 + int val; 229 + 230 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 231 + ASSERT_NE(npages, 0); 232 + size = npages << self->page_shift; 233 + 234 + buffer = malloc(sizeof(*buffer)); 235 + ASSERT_NE(buffer, NULL); 236 + 237 + buffer->fd = -1; 238 + buffer->size = size; 239 + buffer->mirror = malloc(size); 240 + ASSERT_NE(buffer->mirror, NULL); 241 + 242 + buffer->ptr = mmap(NULL, size, 243 + PROT_READ | PROT_WRITE, 244 + MAP_PRIVATE | MAP_ANONYMOUS, 245 + buffer->fd, 0); 246 + ASSERT_NE(buffer->ptr, MAP_FAILED); 247 + 248 + /* 249 + * Initialize buffer in system memory but leave the first two pages 250 + * zero (pte_none and pfn_zero). 251 + */ 252 + i = 2 * self->page_size / sizeof(*ptr); 253 + for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 254 + ptr[i] = i; 255 + 256 + /* Set buffer permission to read-only. */ 257 + ret = mprotect(buffer->ptr, size, PROT_READ); 258 + ASSERT_EQ(ret, 0); 259 + 260 + /* Populate the CPU page table with a special zero page. */ 261 + val = *(int *)(buffer->ptr + self->page_size); 262 + ASSERT_EQ(val, 0); 263 + 264 + /* Simulate a device reading system memory. */ 265 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 266 + ASSERT_EQ(ret, 0); 267 + ASSERT_EQ(buffer->cpages, npages); 268 + ASSERT_EQ(buffer->faults, 1); 269 + 270 + /* Check what the device read. */ 271 + ptr = buffer->mirror; 272 + for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) 273 + ASSERT_EQ(ptr[i], 0); 274 + for (; i < size / sizeof(*ptr); ++i) 275 + ASSERT_EQ(ptr[i], i); 276 + 277 + hmm_buffer_free(buffer); 278 + } 279 + 280 + /* 281 + * Read private anonymous memory which has been protected with 282 + * mprotect() PROT_NONE. 283 + */ 284 + TEST_F(hmm, anon_read_prot) 285 + { 286 + struct hmm_buffer *buffer; 287 + unsigned long npages; 288 + unsigned long size; 289 + unsigned long i; 290 + int *ptr; 291 + int ret; 292 + 293 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 294 + ASSERT_NE(npages, 0); 295 + size = npages << self->page_shift; 296 + 297 + buffer = malloc(sizeof(*buffer)); 298 + ASSERT_NE(buffer, NULL); 299 + 300 + buffer->fd = -1; 301 + buffer->size = size; 302 + buffer->mirror = malloc(size); 303 + ASSERT_NE(buffer->mirror, NULL); 304 + 305 + buffer->ptr = mmap(NULL, size, 306 + PROT_READ | PROT_WRITE, 307 + MAP_PRIVATE | MAP_ANONYMOUS, 308 + buffer->fd, 0); 309 + ASSERT_NE(buffer->ptr, MAP_FAILED); 310 + 311 + /* Initialize buffer in system memory. */ 312 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 313 + ptr[i] = i; 314 + 315 + /* Initialize mirror buffer so we can verify it isn't written. */ 316 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 317 + ptr[i] = -i; 318 + 319 + /* Protect buffer from reading. */ 320 + ret = mprotect(buffer->ptr, size, PROT_NONE); 321 + ASSERT_EQ(ret, 0); 322 + 323 + /* Simulate a device reading system memory. */ 324 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 325 + ASSERT_EQ(ret, -EFAULT); 326 + 327 + /* Allow CPU to read the buffer so we can check it. */ 328 + ret = mprotect(buffer->ptr, size, PROT_READ); 329 + ASSERT_EQ(ret, 0); 330 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 331 + ASSERT_EQ(ptr[i], i); 332 + 333 + /* Check what the device read. */ 334 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 335 + ASSERT_EQ(ptr[i], -i); 336 + 337 + hmm_buffer_free(buffer); 338 + } 339 + 340 + /* 341 + * Write private anonymous memory. 342 + */ 343 + TEST_F(hmm, anon_write) 344 + { 345 + struct hmm_buffer *buffer; 346 + unsigned long npages; 347 + unsigned long size; 348 + unsigned long i; 349 + int *ptr; 350 + int ret; 351 + 352 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 353 + ASSERT_NE(npages, 0); 354 + size = npages << self->page_shift; 355 + 356 + buffer = malloc(sizeof(*buffer)); 357 + ASSERT_NE(buffer, NULL); 358 + 359 + buffer->fd = -1; 360 + buffer->size = size; 361 + buffer->mirror = malloc(size); 362 + ASSERT_NE(buffer->mirror, NULL); 363 + 364 + buffer->ptr = mmap(NULL, size, 365 + PROT_READ | PROT_WRITE, 366 + MAP_PRIVATE | MAP_ANONYMOUS, 367 + buffer->fd, 0); 368 + ASSERT_NE(buffer->ptr, MAP_FAILED); 369 + 370 + /* Initialize data that the device will write to buffer->ptr. */ 371 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 372 + ptr[i] = i; 373 + 374 + /* Simulate a device writing system memory. */ 375 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 376 + ASSERT_EQ(ret, 0); 377 + ASSERT_EQ(buffer->cpages, npages); 378 + ASSERT_EQ(buffer->faults, 1); 379 + 380 + /* Check what the device wrote. */ 381 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 382 + ASSERT_EQ(ptr[i], i); 383 + 384 + hmm_buffer_free(buffer); 385 + } 386 + 387 + /* 388 + * Write private anonymous memory which has been protected with 389 + * mprotect() PROT_READ. 390 + */ 391 + TEST_F(hmm, anon_write_prot) 392 + { 393 + struct hmm_buffer *buffer; 394 + unsigned long npages; 395 + unsigned long size; 396 + unsigned long i; 397 + int *ptr; 398 + int ret; 399 + 400 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 401 + ASSERT_NE(npages, 0); 402 + size = npages << self->page_shift; 403 + 404 + buffer = malloc(sizeof(*buffer)); 405 + ASSERT_NE(buffer, NULL); 406 + 407 + buffer->fd = -1; 408 + buffer->size = size; 409 + buffer->mirror = malloc(size); 410 + ASSERT_NE(buffer->mirror, NULL); 411 + 412 + buffer->ptr = mmap(NULL, size, 413 + PROT_READ, 414 + MAP_PRIVATE | MAP_ANONYMOUS, 415 + buffer->fd, 0); 416 + ASSERT_NE(buffer->ptr, MAP_FAILED); 417 + 418 + /* Simulate a device reading a zero page of memory. */ 419 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); 420 + ASSERT_EQ(ret, 0); 421 + ASSERT_EQ(buffer->cpages, 1); 422 + ASSERT_EQ(buffer->faults, 1); 423 + 424 + /* Initialize data that the device will write to buffer->ptr. */ 425 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 426 + ptr[i] = i; 427 + 428 + /* Simulate a device writing system memory. */ 429 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 430 + ASSERT_EQ(ret, -EPERM); 431 + 432 + /* Check what the device wrote. */ 433 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 434 + ASSERT_EQ(ptr[i], 0); 435 + 436 + /* Now allow writing and see that the zero page is replaced. */ 437 + ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); 438 + ASSERT_EQ(ret, 0); 439 + 440 + /* Simulate a device writing system memory. */ 441 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 442 + ASSERT_EQ(ret, 0); 443 + ASSERT_EQ(buffer->cpages, npages); 444 + ASSERT_EQ(buffer->faults, 1); 445 + 446 + /* Check what the device wrote. */ 447 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 448 + ASSERT_EQ(ptr[i], i); 449 + 450 + hmm_buffer_free(buffer); 451 + } 452 + 453 + /* 454 + * Check that a device writing an anonymous private mapping 455 + * will copy-on-write if a child process inherits the mapping. 456 + */ 457 + TEST_F(hmm, anon_write_child) 458 + { 459 + struct hmm_buffer *buffer; 460 + unsigned long npages; 461 + unsigned long size; 462 + unsigned long i; 463 + int *ptr; 464 + pid_t pid; 465 + int child_fd; 466 + int ret; 467 + 468 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 469 + ASSERT_NE(npages, 0); 470 + size = npages << self->page_shift; 471 + 472 + buffer = malloc(sizeof(*buffer)); 473 + ASSERT_NE(buffer, NULL); 474 + 475 + buffer->fd = -1; 476 + buffer->size = size; 477 + buffer->mirror = malloc(size); 478 + ASSERT_NE(buffer->mirror, NULL); 479 + 480 + buffer->ptr = mmap(NULL, size, 481 + PROT_READ | PROT_WRITE, 482 + MAP_PRIVATE | MAP_ANONYMOUS, 483 + buffer->fd, 0); 484 + ASSERT_NE(buffer->ptr, MAP_FAILED); 485 + 486 + /* Initialize buffer->ptr so we can tell if it is written. */ 487 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 488 + ptr[i] = i; 489 + 490 + /* Initialize data that the device will write to buffer->ptr. */ 491 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 492 + ptr[i] = -i; 493 + 494 + pid = fork(); 495 + if (pid == -1) 496 + ASSERT_EQ(pid, 0); 497 + if (pid != 0) { 498 + waitpid(pid, &ret, 0); 499 + ASSERT_EQ(WIFEXITED(ret), 1); 500 + 501 + /* Check that the parent's buffer did not change. */ 502 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 503 + ASSERT_EQ(ptr[i], i); 504 + return; 505 + } 506 + 507 + /* Check that we see the parent's values. */ 508 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 509 + ASSERT_EQ(ptr[i], i); 510 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 511 + ASSERT_EQ(ptr[i], -i); 512 + 513 + /* The child process needs its own mirror to its own mm. */ 514 + child_fd = hmm_open(0); 515 + ASSERT_GE(child_fd, 0); 516 + 517 + /* Simulate a device writing system memory. */ 518 + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); 519 + ASSERT_EQ(ret, 0); 520 + ASSERT_EQ(buffer->cpages, npages); 521 + ASSERT_EQ(buffer->faults, 1); 522 + 523 + /* Check what the device wrote. */ 524 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 525 + ASSERT_EQ(ptr[i], -i); 526 + 527 + close(child_fd); 528 + exit(0); 529 + } 530 + 531 + /* 532 + * Check that a device writing an anonymous shared mapping 533 + * will not copy-on-write if a child process inherits the mapping. 534 + */ 535 + TEST_F(hmm, anon_write_child_shared) 536 + { 537 + struct hmm_buffer *buffer; 538 + unsigned long npages; 539 + unsigned long size; 540 + unsigned long i; 541 + int *ptr; 542 + pid_t pid; 543 + int child_fd; 544 + int ret; 545 + 546 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 547 + ASSERT_NE(npages, 0); 548 + size = npages << self->page_shift; 549 + 550 + buffer = malloc(sizeof(*buffer)); 551 + ASSERT_NE(buffer, NULL); 552 + 553 + buffer->fd = -1; 554 + buffer->size = size; 555 + buffer->mirror = malloc(size); 556 + ASSERT_NE(buffer->mirror, NULL); 557 + 558 + buffer->ptr = mmap(NULL, size, 559 + PROT_READ | PROT_WRITE, 560 + MAP_SHARED | MAP_ANONYMOUS, 561 + buffer->fd, 0); 562 + ASSERT_NE(buffer->ptr, MAP_FAILED); 563 + 564 + /* Initialize buffer->ptr so we can tell if it is written. */ 565 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 566 + ptr[i] = i; 567 + 568 + /* Initialize data that the device will write to buffer->ptr. */ 569 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 570 + ptr[i] = -i; 571 + 572 + pid = fork(); 573 + if (pid == -1) 574 + ASSERT_EQ(pid, 0); 575 + if (pid != 0) { 576 + waitpid(pid, &ret, 0); 577 + ASSERT_EQ(WIFEXITED(ret), 1); 578 + 579 + /* Check that the parent's buffer did change. */ 580 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 581 + ASSERT_EQ(ptr[i], -i); 582 + return; 583 + } 584 + 585 + /* Check that we see the parent's values. */ 586 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 587 + ASSERT_EQ(ptr[i], i); 588 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 589 + ASSERT_EQ(ptr[i], -i); 590 + 591 + /* The child process needs its own mirror to its own mm. */ 592 + child_fd = hmm_open(0); 593 + ASSERT_GE(child_fd, 0); 594 + 595 + /* Simulate a device writing system memory. */ 596 + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); 597 + ASSERT_EQ(ret, 0); 598 + ASSERT_EQ(buffer->cpages, npages); 599 + ASSERT_EQ(buffer->faults, 1); 600 + 601 + /* Check what the device wrote. */ 602 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 603 + ASSERT_EQ(ptr[i], -i); 604 + 605 + close(child_fd); 606 + exit(0); 607 + } 608 + 609 + /* 610 + * Write private anonymous huge page. 611 + */ 612 + TEST_F(hmm, anon_write_huge) 613 + { 614 + struct hmm_buffer *buffer; 615 + unsigned long npages; 616 + unsigned long size; 617 + unsigned long i; 618 + void *old_ptr; 619 + void *map; 620 + int *ptr; 621 + int ret; 622 + 623 + size = 2 * TWOMEG; 624 + 625 + buffer = malloc(sizeof(*buffer)); 626 + ASSERT_NE(buffer, NULL); 627 + 628 + buffer->fd = -1; 629 + buffer->size = size; 630 + buffer->mirror = malloc(size); 631 + ASSERT_NE(buffer->mirror, NULL); 632 + 633 + buffer->ptr = mmap(NULL, size, 634 + PROT_READ | PROT_WRITE, 635 + MAP_PRIVATE | MAP_ANONYMOUS, 636 + buffer->fd, 0); 637 + ASSERT_NE(buffer->ptr, MAP_FAILED); 638 + 639 + size = TWOMEG; 640 + npages = size >> self->page_shift; 641 + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); 642 + ret = madvise(map, size, MADV_HUGEPAGE); 643 + ASSERT_EQ(ret, 0); 644 + old_ptr = buffer->ptr; 645 + buffer->ptr = map; 646 + 647 + /* Initialize data that the device will write to buffer->ptr. */ 648 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 649 + ptr[i] = i; 650 + 651 + /* Simulate a device writing system memory. */ 652 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 653 + ASSERT_EQ(ret, 0); 654 + ASSERT_EQ(buffer->cpages, npages); 655 + ASSERT_EQ(buffer->faults, 1); 656 + 657 + /* Check what the device wrote. */ 658 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 659 + ASSERT_EQ(ptr[i], i); 660 + 661 + buffer->ptr = old_ptr; 662 + hmm_buffer_free(buffer); 663 + } 664 + 665 + /* 666 + * Write huge TLBFS page. 667 + */ 668 + TEST_F(hmm, anon_write_hugetlbfs) 669 + { 670 + struct hmm_buffer *buffer; 671 + unsigned long npages; 672 + unsigned long size; 673 + unsigned long i; 674 + int *ptr; 675 + int ret; 676 + long pagesizes[4]; 677 + int n, idx; 678 + 679 + /* Skip test if we can't allocate a hugetlbfs page. */ 680 + 681 + n = gethugepagesizes(pagesizes, 4); 682 + if (n <= 0) 683 + return; 684 + for (idx = 0; --n > 0; ) { 685 + if (pagesizes[n] < pagesizes[idx]) 686 + idx = n; 687 + } 688 + size = ALIGN(TWOMEG, pagesizes[idx]); 689 + npages = size >> self->page_shift; 690 + 691 + buffer = malloc(sizeof(*buffer)); 692 + ASSERT_NE(buffer, NULL); 693 + 694 + buffer->ptr = get_hugepage_region(size, GHR_STRICT); 695 + if (buffer->ptr == NULL) { 696 + free(buffer); 697 + return; 698 + } 699 + 700 + buffer->fd = -1; 701 + buffer->size = size; 702 + buffer->mirror = malloc(size); 703 + ASSERT_NE(buffer->mirror, NULL); 704 + 705 + /* Initialize data that the device will write to buffer->ptr. */ 706 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 707 + ptr[i] = i; 708 + 709 + /* Simulate a device writing system memory. */ 710 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 711 + ASSERT_EQ(ret, 0); 712 + ASSERT_EQ(buffer->cpages, npages); 713 + ASSERT_EQ(buffer->faults, 1); 714 + 715 + /* Check what the device wrote. */ 716 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 717 + ASSERT_EQ(ptr[i], i); 718 + 719 + free_hugepage_region(buffer->ptr); 720 + buffer->ptr = NULL; 721 + hmm_buffer_free(buffer); 722 + } 723 + 724 + /* 725 + * Read mmap'ed file memory. 726 + */ 727 + TEST_F(hmm, file_read) 728 + { 729 + struct hmm_buffer *buffer; 730 + unsigned long npages; 731 + unsigned long size; 732 + unsigned long i; 733 + int *ptr; 734 + int ret; 735 + int fd; 736 + ssize_t len; 737 + 738 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 739 + ASSERT_NE(npages, 0); 740 + size = npages << self->page_shift; 741 + 742 + fd = hmm_create_file(size); 743 + ASSERT_GE(fd, 0); 744 + 745 + buffer = malloc(sizeof(*buffer)); 746 + ASSERT_NE(buffer, NULL); 747 + 748 + buffer->fd = fd; 749 + buffer->size = size; 750 + buffer->mirror = malloc(size); 751 + ASSERT_NE(buffer->mirror, NULL); 752 + 753 + /* Write initial contents of the file. */ 754 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 755 + ptr[i] = i; 756 + len = pwrite(fd, buffer->mirror, size, 0); 757 + ASSERT_EQ(len, size); 758 + memset(buffer->mirror, 0, size); 759 + 760 + buffer->ptr = mmap(NULL, size, 761 + PROT_READ, 762 + MAP_SHARED, 763 + buffer->fd, 0); 764 + ASSERT_NE(buffer->ptr, MAP_FAILED); 765 + 766 + /* Simulate a device reading system memory. */ 767 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); 768 + ASSERT_EQ(ret, 0); 769 + ASSERT_EQ(buffer->cpages, npages); 770 + ASSERT_EQ(buffer->faults, 1); 771 + 772 + /* Check what the device read. */ 773 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 774 + ASSERT_EQ(ptr[i], i); 775 + 776 + hmm_buffer_free(buffer); 777 + } 778 + 779 + /* 780 + * Write mmap'ed file memory. 781 + */ 782 + TEST_F(hmm, file_write) 783 + { 784 + struct hmm_buffer *buffer; 785 + unsigned long npages; 786 + unsigned long size; 787 + unsigned long i; 788 + int *ptr; 789 + int ret; 790 + int fd; 791 + ssize_t len; 792 + 793 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 794 + ASSERT_NE(npages, 0); 795 + size = npages << self->page_shift; 796 + 797 + fd = hmm_create_file(size); 798 + ASSERT_GE(fd, 0); 799 + 800 + buffer = malloc(sizeof(*buffer)); 801 + ASSERT_NE(buffer, NULL); 802 + 803 + buffer->fd = fd; 804 + buffer->size = size; 805 + buffer->mirror = malloc(size); 806 + ASSERT_NE(buffer->mirror, NULL); 807 + 808 + buffer->ptr = mmap(NULL, size, 809 + PROT_READ | PROT_WRITE, 810 + MAP_SHARED, 811 + buffer->fd, 0); 812 + ASSERT_NE(buffer->ptr, MAP_FAILED); 813 + 814 + /* Initialize data that the device will write to buffer->ptr. */ 815 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 816 + ptr[i] = i; 817 + 818 + /* Simulate a device writing system memory. */ 819 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 820 + ASSERT_EQ(ret, 0); 821 + ASSERT_EQ(buffer->cpages, npages); 822 + ASSERT_EQ(buffer->faults, 1); 823 + 824 + /* Check what the device wrote. */ 825 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 826 + ASSERT_EQ(ptr[i], i); 827 + 828 + /* Check that the device also wrote the file. */ 829 + len = pread(fd, buffer->mirror, size, 0); 830 + ASSERT_EQ(len, size); 831 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 832 + ASSERT_EQ(ptr[i], i); 833 + 834 + hmm_buffer_free(buffer); 835 + } 836 + 837 + /* 838 + * Migrate anonymous memory to device private memory. 839 + */ 840 + TEST_F(hmm, migrate) 841 + { 842 + struct hmm_buffer *buffer; 843 + unsigned long npages; 844 + unsigned long size; 845 + unsigned long i; 846 + int *ptr; 847 + int ret; 848 + 849 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 850 + ASSERT_NE(npages, 0); 851 + size = npages << self->page_shift; 852 + 853 + buffer = malloc(sizeof(*buffer)); 854 + ASSERT_NE(buffer, NULL); 855 + 856 + buffer->fd = -1; 857 + buffer->size = size; 858 + buffer->mirror = malloc(size); 859 + ASSERT_NE(buffer->mirror, NULL); 860 + 861 + buffer->ptr = mmap(NULL, size, 862 + PROT_READ | PROT_WRITE, 863 + MAP_PRIVATE | MAP_ANONYMOUS, 864 + buffer->fd, 0); 865 + ASSERT_NE(buffer->ptr, MAP_FAILED); 866 + 867 + /* Initialize buffer in system memory. */ 868 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 869 + ptr[i] = i; 870 + 871 + /* Migrate memory to device. */ 872 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); 873 + ASSERT_EQ(ret, 0); 874 + ASSERT_EQ(buffer->cpages, npages); 875 + 876 + /* Check what the device read. */ 877 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 878 + ASSERT_EQ(ptr[i], i); 879 + 880 + hmm_buffer_free(buffer); 881 + } 882 + 883 + /* 884 + * Migrate anonymous memory to device private memory and fault it back to system 885 + * memory. 886 + */ 887 + TEST_F(hmm, migrate_fault) 888 + { 889 + struct hmm_buffer *buffer; 890 + unsigned long npages; 891 + unsigned long size; 892 + unsigned long i; 893 + int *ptr; 894 + int ret; 895 + 896 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 897 + ASSERT_NE(npages, 0); 898 + size = npages << self->page_shift; 899 + 900 + buffer = malloc(sizeof(*buffer)); 901 + ASSERT_NE(buffer, NULL); 902 + 903 + buffer->fd = -1; 904 + buffer->size = size; 905 + buffer->mirror = malloc(size); 906 + ASSERT_NE(buffer->mirror, NULL); 907 + 908 + buffer->ptr = mmap(NULL, size, 909 + PROT_READ | PROT_WRITE, 910 + MAP_PRIVATE | MAP_ANONYMOUS, 911 + buffer->fd, 0); 912 + ASSERT_NE(buffer->ptr, MAP_FAILED); 913 + 914 + /* Initialize buffer in system memory. */ 915 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 916 + ptr[i] = i; 917 + 918 + /* Migrate memory to device. */ 919 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); 920 + ASSERT_EQ(ret, 0); 921 + ASSERT_EQ(buffer->cpages, npages); 922 + 923 + /* Check what the device read. */ 924 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 925 + ASSERT_EQ(ptr[i], i); 926 + 927 + /* Fault pages back to system memory and check them. */ 928 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 929 + ASSERT_EQ(ptr[i], i); 930 + 931 + hmm_buffer_free(buffer); 932 + } 933 + 934 + /* 935 + * Try to migrate various memory types to device private memory. 936 + */ 937 + TEST_F(hmm2, migrate_mixed) 938 + { 939 + struct hmm_buffer *buffer; 940 + unsigned long npages; 941 + unsigned long size; 942 + int *ptr; 943 + unsigned char *p; 944 + int ret; 945 + int val; 946 + 947 + npages = 6; 948 + size = npages << self->page_shift; 949 + 950 + buffer = malloc(sizeof(*buffer)); 951 + ASSERT_NE(buffer, NULL); 952 + 953 + buffer->fd = -1; 954 + buffer->size = size; 955 + buffer->mirror = malloc(size); 956 + ASSERT_NE(buffer->mirror, NULL); 957 + 958 + /* Reserve a range of addresses. */ 959 + buffer->ptr = mmap(NULL, size, 960 + PROT_NONE, 961 + MAP_PRIVATE | MAP_ANONYMOUS, 962 + buffer->fd, 0); 963 + ASSERT_NE(buffer->ptr, MAP_FAILED); 964 + p = buffer->ptr; 965 + 966 + /* Migrating a protected area should be an error. */ 967 + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages); 968 + ASSERT_EQ(ret, -EINVAL); 969 + 970 + /* Punch a hole after the first page address. */ 971 + ret = munmap(buffer->ptr + self->page_size, self->page_size); 972 + ASSERT_EQ(ret, 0); 973 + 974 + /* We expect an error if the vma doesn't cover the range. */ 975 + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3); 976 + ASSERT_EQ(ret, -EINVAL); 977 + 978 + /* Page 2 will be a read-only zero page. */ 979 + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, 980 + PROT_READ); 981 + ASSERT_EQ(ret, 0); 982 + ptr = (int *)(buffer->ptr + 2 * self->page_size); 983 + val = *ptr + 3; 984 + ASSERT_EQ(val, 3); 985 + 986 + /* Page 3 will be read-only. */ 987 + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 988 + PROT_READ | PROT_WRITE); 989 + ASSERT_EQ(ret, 0); 990 + ptr = (int *)(buffer->ptr + 3 * self->page_size); 991 + *ptr = val; 992 + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 993 + PROT_READ); 994 + ASSERT_EQ(ret, 0); 995 + 996 + /* Page 4-5 will be read-write. */ 997 + ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, 998 + PROT_READ | PROT_WRITE); 999 + ASSERT_EQ(ret, 0); 1000 + ptr = (int *)(buffer->ptr + 4 * self->page_size); 1001 + *ptr = val; 1002 + ptr = (int *)(buffer->ptr + 5 * self->page_size); 1003 + *ptr = val; 1004 + 1005 + /* Now try to migrate pages 2-5 to device 1. */ 1006 + buffer->ptr = p + 2 * self->page_size; 1007 + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4); 1008 + ASSERT_EQ(ret, 0); 1009 + ASSERT_EQ(buffer->cpages, 4); 1010 + 1011 + /* Page 5 won't be migrated to device 0 because it's on device 1. */ 1012 + buffer->ptr = p + 5 * self->page_size; 1013 + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); 1014 + ASSERT_EQ(ret, -ENOENT); 1015 + buffer->ptr = p; 1016 + 1017 + buffer->ptr = p; 1018 + hmm_buffer_free(buffer); 1019 + } 1020 + 1021 + /* 1022 + * Migrate anonymous memory to device private memory and fault it back to system 1023 + * memory multiple times. 1024 + */ 1025 + TEST_F(hmm, migrate_multiple) 1026 + { 1027 + struct hmm_buffer *buffer; 1028 + unsigned long npages; 1029 + unsigned long size; 1030 + unsigned long i; 1031 + unsigned long c; 1032 + int *ptr; 1033 + int ret; 1034 + 1035 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1036 + ASSERT_NE(npages, 0); 1037 + size = npages << self->page_shift; 1038 + 1039 + for (c = 0; c < NTIMES; c++) { 1040 + buffer = malloc(sizeof(*buffer)); 1041 + ASSERT_NE(buffer, NULL); 1042 + 1043 + buffer->fd = -1; 1044 + buffer->size = size; 1045 + buffer->mirror = malloc(size); 1046 + ASSERT_NE(buffer->mirror, NULL); 1047 + 1048 + buffer->ptr = mmap(NULL, size, 1049 + PROT_READ | PROT_WRITE, 1050 + MAP_PRIVATE | MAP_ANONYMOUS, 1051 + buffer->fd, 0); 1052 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1053 + 1054 + /* Initialize buffer in system memory. */ 1055 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1056 + ptr[i] = i; 1057 + 1058 + /* Migrate memory to device. */ 1059 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, 1060 + npages); 1061 + ASSERT_EQ(ret, 0); 1062 + ASSERT_EQ(buffer->cpages, npages); 1063 + 1064 + /* Check what the device read. */ 1065 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1066 + ASSERT_EQ(ptr[i], i); 1067 + 1068 + /* Fault pages back to system memory and check them. */ 1069 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1070 + ASSERT_EQ(ptr[i], i); 1071 + 1072 + hmm_buffer_free(buffer); 1073 + } 1074 + } 1075 + 1076 + /* 1077 + * Read anonymous memory multiple times. 1078 + */ 1079 + TEST_F(hmm, anon_read_multiple) 1080 + { 1081 + struct hmm_buffer *buffer; 1082 + unsigned long npages; 1083 + unsigned long size; 1084 + unsigned long i; 1085 + unsigned long c; 1086 + int *ptr; 1087 + int ret; 1088 + 1089 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1090 + ASSERT_NE(npages, 0); 1091 + size = npages << self->page_shift; 1092 + 1093 + for (c = 0; c < NTIMES; c++) { 1094 + buffer = malloc(sizeof(*buffer)); 1095 + ASSERT_NE(buffer, NULL); 1096 + 1097 + buffer->fd = -1; 1098 + buffer->size = size; 1099 + buffer->mirror = malloc(size); 1100 + ASSERT_NE(buffer->mirror, NULL); 1101 + 1102 + buffer->ptr = mmap(NULL, size, 1103 + PROT_READ | PROT_WRITE, 1104 + MAP_PRIVATE | MAP_ANONYMOUS, 1105 + buffer->fd, 0); 1106 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1107 + 1108 + /* Initialize buffer in system memory. */ 1109 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1110 + ptr[i] = i + c; 1111 + 1112 + /* Simulate a device reading system memory. */ 1113 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1114 + npages); 1115 + ASSERT_EQ(ret, 0); 1116 + ASSERT_EQ(buffer->cpages, npages); 1117 + ASSERT_EQ(buffer->faults, 1); 1118 + 1119 + /* Check what the device read. */ 1120 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1121 + ASSERT_EQ(ptr[i], i + c); 1122 + 1123 + hmm_buffer_free(buffer); 1124 + } 1125 + } 1126 + 1127 + void *unmap_buffer(void *p) 1128 + { 1129 + struct hmm_buffer *buffer = p; 1130 + 1131 + /* Delay for a bit and then unmap buffer while it is being read. */ 1132 + hmm_nanosleep(hmm_random() % 32000); 1133 + munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); 1134 + buffer->ptr = NULL; 1135 + 1136 + return NULL; 1137 + } 1138 + 1139 + /* 1140 + * Try reading anonymous memory while it is being unmapped. 1141 + */ 1142 + TEST_F(hmm, anon_teardown) 1143 + { 1144 + unsigned long npages; 1145 + unsigned long size; 1146 + unsigned long c; 1147 + void *ret; 1148 + 1149 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1150 + ASSERT_NE(npages, 0); 1151 + size = npages << self->page_shift; 1152 + 1153 + for (c = 0; c < NTIMES; ++c) { 1154 + pthread_t thread; 1155 + struct hmm_buffer *buffer; 1156 + unsigned long i; 1157 + int *ptr; 1158 + int rc; 1159 + 1160 + buffer = malloc(sizeof(*buffer)); 1161 + ASSERT_NE(buffer, NULL); 1162 + 1163 + buffer->fd = -1; 1164 + buffer->size = size; 1165 + buffer->mirror = malloc(size); 1166 + ASSERT_NE(buffer->mirror, NULL); 1167 + 1168 + buffer->ptr = mmap(NULL, size, 1169 + PROT_READ | PROT_WRITE, 1170 + MAP_PRIVATE | MAP_ANONYMOUS, 1171 + buffer->fd, 0); 1172 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1173 + 1174 + /* Initialize buffer in system memory. */ 1175 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1176 + ptr[i] = i + c; 1177 + 1178 + rc = pthread_create(&thread, NULL, unmap_buffer, buffer); 1179 + ASSERT_EQ(rc, 0); 1180 + 1181 + /* Simulate a device reading system memory. */ 1182 + rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1183 + npages); 1184 + if (rc == 0) { 1185 + ASSERT_EQ(buffer->cpages, npages); 1186 + ASSERT_EQ(buffer->faults, 1); 1187 + 1188 + /* Check what the device read. */ 1189 + for (i = 0, ptr = buffer->mirror; 1190 + i < size / sizeof(*ptr); 1191 + ++i) 1192 + ASSERT_EQ(ptr[i], i + c); 1193 + } 1194 + 1195 + pthread_join(thread, &ret); 1196 + hmm_buffer_free(buffer); 1197 + } 1198 + } 1199 + 1200 + /* 1201 + * Test memory snapshot without faulting in pages accessed by the device. 1202 + */ 1203 + TEST_F(hmm2, snapshot) 1204 + { 1205 + struct hmm_buffer *buffer; 1206 + unsigned long npages; 1207 + unsigned long size; 1208 + int *ptr; 1209 + unsigned char *p; 1210 + unsigned char *m; 1211 + int ret; 1212 + int val; 1213 + 1214 + npages = 7; 1215 + size = npages << self->page_shift; 1216 + 1217 + buffer = malloc(sizeof(*buffer)); 1218 + ASSERT_NE(buffer, NULL); 1219 + 1220 + buffer->fd = -1; 1221 + buffer->size = size; 1222 + buffer->mirror = malloc(npages); 1223 + ASSERT_NE(buffer->mirror, NULL); 1224 + 1225 + /* Reserve a range of addresses. */ 1226 + buffer->ptr = mmap(NULL, size, 1227 + PROT_NONE, 1228 + MAP_PRIVATE | MAP_ANONYMOUS, 1229 + buffer->fd, 0); 1230 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1231 + p = buffer->ptr; 1232 + 1233 + /* Punch a hole after the first page address. */ 1234 + ret = munmap(buffer->ptr + self->page_size, self->page_size); 1235 + ASSERT_EQ(ret, 0); 1236 + 1237 + /* Page 2 will be read-only zero page. */ 1238 + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, 1239 + PROT_READ); 1240 + ASSERT_EQ(ret, 0); 1241 + ptr = (int *)(buffer->ptr + 2 * self->page_size); 1242 + val = *ptr + 3; 1243 + ASSERT_EQ(val, 3); 1244 + 1245 + /* Page 3 will be read-only. */ 1246 + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1247 + PROT_READ | PROT_WRITE); 1248 + ASSERT_EQ(ret, 0); 1249 + ptr = (int *)(buffer->ptr + 3 * self->page_size); 1250 + *ptr = val; 1251 + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, 1252 + PROT_READ); 1253 + ASSERT_EQ(ret, 0); 1254 + 1255 + /* Page 4-6 will be read-write. */ 1256 + ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, 1257 + PROT_READ | PROT_WRITE); 1258 + ASSERT_EQ(ret, 0); 1259 + ptr = (int *)(buffer->ptr + 4 * self->page_size); 1260 + *ptr = val; 1261 + 1262 + /* Page 5 will be migrated to device 0. */ 1263 + buffer->ptr = p + 5 * self->page_size; 1264 + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); 1265 + ASSERT_EQ(ret, 0); 1266 + ASSERT_EQ(buffer->cpages, 1); 1267 + 1268 + /* Page 6 will be migrated to device 1. */ 1269 + buffer->ptr = p + 6 * self->page_size; 1270 + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1); 1271 + ASSERT_EQ(ret, 0); 1272 + ASSERT_EQ(buffer->cpages, 1); 1273 + 1274 + /* Simulate a device snapshotting CPU pagetables. */ 1275 + buffer->ptr = p; 1276 + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); 1277 + ASSERT_EQ(ret, 0); 1278 + ASSERT_EQ(buffer->cpages, npages); 1279 + 1280 + /* Check what the device saw. */ 1281 + m = buffer->mirror; 1282 + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); 1283 + ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); 1284 + ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); 1285 + ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); 1286 + ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); 1287 + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | 1288 + HMM_DMIRROR_PROT_WRITE); 1289 + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); 1290 + 1291 + hmm_buffer_free(buffer); 1292 + } 1293 + 1294 + /* 1295 + * Test two devices reading the same memory (double mapped). 1296 + */ 1297 + TEST_F(hmm2, double_map) 1298 + { 1299 + struct hmm_buffer *buffer; 1300 + unsigned long npages; 1301 + unsigned long size; 1302 + unsigned long i; 1303 + int *ptr; 1304 + int ret; 1305 + 1306 + npages = 6; 1307 + size = npages << self->page_shift; 1308 + 1309 + buffer = malloc(sizeof(*buffer)); 1310 + ASSERT_NE(buffer, NULL); 1311 + 1312 + buffer->fd = -1; 1313 + buffer->size = size; 1314 + buffer->mirror = malloc(npages); 1315 + ASSERT_NE(buffer->mirror, NULL); 1316 + 1317 + /* Reserve a range of addresses. */ 1318 + buffer->ptr = mmap(NULL, size, 1319 + PROT_READ | PROT_WRITE, 1320 + MAP_PRIVATE | MAP_ANONYMOUS, 1321 + buffer->fd, 0); 1322 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1323 + 1324 + /* Initialize buffer in system memory. */ 1325 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1326 + ptr[i] = i; 1327 + 1328 + /* Make region read-only. */ 1329 + ret = mprotect(buffer->ptr, size, PROT_READ); 1330 + ASSERT_EQ(ret, 0); 1331 + 1332 + /* Simulate device 0 reading system memory. */ 1333 + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); 1334 + ASSERT_EQ(ret, 0); 1335 + ASSERT_EQ(buffer->cpages, npages); 1336 + ASSERT_EQ(buffer->faults, 1); 1337 + 1338 + /* Check what the device read. */ 1339 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1340 + ASSERT_EQ(ptr[i], i); 1341 + 1342 + /* Simulate device 1 reading system memory. */ 1343 + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); 1344 + ASSERT_EQ(ret, 0); 1345 + ASSERT_EQ(buffer->cpages, npages); 1346 + ASSERT_EQ(buffer->faults, 1); 1347 + 1348 + /* Check what the device read. */ 1349 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1350 + ASSERT_EQ(ptr[i], i); 1351 + 1352 + /* Punch a hole after the first page address. */ 1353 + ret = munmap(buffer->ptr + self->page_size, self->page_size); 1354 + ASSERT_EQ(ret, 0); 1355 + 1356 + hmm_buffer_free(buffer); 1357 + } 1358 + 1359 + TEST_HARNESS_MAIN
+16
tools/testing/selftests/vm/run_vmtests
··· 307 307 echo "[FAIL]" 308 308 exitcode=1 309 309 fi 310 + 311 + echo "running HMM smoke test" 312 + echo "------------------------------------" 313 + ./test_hmm.sh smoke 314 + ret_val=$? 315 + 316 + if [ $ret_val -eq 0 ]; then 317 + echo "[PASS]" 318 + elif [ $ret_val -eq $ksft_skip ]; then 319 + echo "[SKIP]" 320 + exitcode=$ksft_skip 321 + else 322 + echo "[FAIL]" 323 + exitcode=1 324 + fi 325 + 310 326 exit $exitcode
+97
tools/testing/selftests/vm/test_hmm.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 5 + # 6 + # This is a test script for the kernel test driver to analyse vmalloc 7 + # allocator. Therefore it is just a kernel module loader. You can specify 8 + # and pass different parameters in order to: 9 + # a) analyse performance of vmalloc allocations; 10 + # b) stressing and stability check of vmalloc subsystem. 11 + 12 + TEST_NAME="test_hmm" 13 + DRIVER="test_hmm" 14 + 15 + # 1 if fails 16 + exitcode=1 17 + 18 + # Kselftest framework requirement - SKIP code is 4. 19 + ksft_skip=4 20 + 21 + check_test_requirements() 22 + { 23 + uid=$(id -u) 24 + if [ $uid -ne 0 ]; then 25 + echo "$0: Must be run as root" 26 + exit $ksft_skip 27 + fi 28 + 29 + if ! which modprobe > /dev/null 2>&1; then 30 + echo "$0: You need modprobe installed" 31 + exit $ksft_skip 32 + fi 33 + 34 + if ! modinfo $DRIVER > /dev/null 2>&1; then 35 + echo "$0: You must have the following enabled in your kernel:" 36 + echo "CONFIG_TEST_HMM=m" 37 + exit $ksft_skip 38 + fi 39 + } 40 + 41 + load_driver() 42 + { 43 + modprobe $DRIVER > /dev/null 2>&1 44 + if [ $? == 0 ]; then 45 + major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) 46 + mknod /dev/hmm_dmirror0 c $major 0 47 + mknod /dev/hmm_dmirror1 c $major 1 48 + fi 49 + } 50 + 51 + unload_driver() 52 + { 53 + modprobe -r $DRIVER > /dev/null 2>&1 54 + rm -f /dev/hmm_dmirror? 55 + } 56 + 57 + run_smoke() 58 + { 59 + echo "Running smoke test. Note, this test provides basic coverage." 60 + 61 + load_driver 62 + $(dirname "${BASH_SOURCE[0]}")/hmm-tests 63 + unload_driver 64 + } 65 + 66 + usage() 67 + { 68 + echo -n "Usage: $0" 69 + echo 70 + echo "Example usage:" 71 + echo 72 + echo "# Shows help message" 73 + echo "./${TEST_NAME}.sh" 74 + echo 75 + echo "# Smoke testing" 76 + echo "./${TEST_NAME}.sh smoke" 77 + echo 78 + exit 0 79 + } 80 + 81 + function run_test() 82 + { 83 + if [ $# -eq 0 ]; then 84 + usage 85 + else 86 + if [ "$1" = "smoke" ]; then 87 + run_smoke 88 + else 89 + usage 90 + fi 91 + fi 92 + } 93 + 94 + check_test_requirements 95 + run_test $@ 96 + 97 + exit 0