Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/privcmd: add IOCTL_PRIVCMD_MMAP_RESOURCE

My recent Xen patch series introduces a new HYPERVISOR_memory_op to
support direct priv-mapping of certain guest resources (such as ioreq
pages, used by emulators) by a tools domain, rather than having to access
such resources via the guest P2M.

This patch adds the necessary infrastructure to the privcmd driver and
Xen MMU code to support direct resource mapping.

NOTE: The adjustment in the MMU code is partially cosmetic. Xen will now
allow a PV tools domain to map guest pages either by GFN or MFN, thus
the term 'mfn' has been swapped for 'pfn' in the lower layers of the
remap code.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>

authored by

Paul Durrant and committed by
Juergen Gross
3ad08765 4bf2cc96

+291 -21
+11
arch/arm/xen/enlighten.c
··· 89 89 } 90 90 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); 91 91 92 + /* Not used by XENFEAT_auto_translated guests. */ 93 + int xen_remap_domain_mfn_array(struct vm_area_struct *vma, 94 + unsigned long addr, 95 + xen_pfn_t *mfn, int nr, 96 + int *err_ptr, pgprot_t prot, 97 + unsigned int domid, struct page **pages) 98 + { 99 + return -ENOSYS; 100 + } 101 + EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); 102 + 92 103 static void xen_read_wallclock(struct timespec64 *ts) 93 104 { 94 105 u32 version;
+43 -17
arch/x86/xen/mmu.c
··· 65 65 #define REMAP_BATCH_SIZE 16 66 66 67 67 struct remap_data { 68 - xen_pfn_t *mfn; 68 + xen_pfn_t *pfn; 69 69 bool contiguous; 70 + bool no_translate; 70 71 pgprot_t prot; 71 72 struct mmu_update *mmu_update; 72 73 }; 73 74 74 - static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, 75 + static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, 75 76 unsigned long addr, void *data) 76 77 { 77 78 struct remap_data *rmd = data; 78 - pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); 79 + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); 79 80 80 - /* If we have a contiguous range, just update the mfn itself, 81 - else update pointer to be "next mfn". */ 81 + /* 82 + * If we have a contiguous range, just update the pfn itself, 83 + * else update pointer to be "next pfn". 84 + */ 82 85 if (rmd->contiguous) 83 - (*rmd->mfn)++; 86 + (*rmd->pfn)++; 84 87 else 85 - rmd->mfn++; 88 + rmd->pfn++; 86 89 87 - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; 90 + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; 91 + rmd->mmu_update->ptr |= rmd->no_translate ? 92 + MMU_PT_UPDATE_NO_TRANSLATE : 93 + MMU_NORMAL_PT_UPDATE; 88 94 rmd->mmu_update->val = pte_val_ma(pte); 89 95 rmd->mmu_update++; 90 96 91 97 return 0; 92 98 } 93 99 94 - static int do_remap_gfn(struct vm_area_struct *vma, 100 + static int do_remap_pfn(struct vm_area_struct *vma, 95 101 unsigned long addr, 96 - xen_pfn_t *gfn, int nr, 102 + xen_pfn_t *pfn, int nr, 97 103 int *err_ptr, pgprot_t prot, 98 - unsigned domid, 104 + unsigned int domid, 105 + bool no_translate, 99 106 struct page **pages) 100 107 { 101 108 int err = 0; ··· 113 106 114 107 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); 115 108 116 - rmd.mfn = gfn; 109 + rmd.pfn = pfn; 117 110 rmd.prot = prot; 118 - /* We use the err_ptr to indicate if there we are doing a contiguous 119 - * mapping or a discontigious mapping. */ 111 + /* 112 + * We use the err_ptr to indicate if there we are doing a contiguous 113 + * mapping or a discontigious mapping. 114 + */ 120 115 rmd.contiguous = !err_ptr; 116 + rmd.no_translate = no_translate; 121 117 122 118 while (nr) { 123 119 int index = 0; ··· 131 121 132 122 rmd.mmu_update = mmu_update; 133 123 err = apply_to_page_range(vma->vm_mm, addr, range, 134 - remap_area_mfn_pte_fn, &rmd); 124 + remap_area_pfn_pte_fn, &rmd); 135 125 if (err) 136 126 goto out; 137 127 ··· 185 175 if (xen_feature(XENFEAT_auto_translated_physmap)) 186 176 return -EOPNOTSUPP; 187 177 188 - return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages); 178 + return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, 179 + pages); 189 180 } 190 181 EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); 191 182 ··· 205 194 * cause of "wrong memory was mapped in". 206 195 */ 207 196 BUG_ON(err_ptr == NULL); 208 - return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages); 197 + return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, 198 + false, pages); 209 199 } 210 200 EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); 201 + 202 + int xen_remap_domain_mfn_array(struct vm_area_struct *vma, 203 + unsigned long addr, 204 + xen_pfn_t *mfn, int nr, 205 + int *err_ptr, pgprot_t prot, 206 + unsigned int domid, struct page **pages) 207 + { 208 + if (xen_feature(XENFEAT_auto_translated_physmap)) 209 + return -EOPNOTSUPP; 210 + 211 + return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, 212 + true, pages); 213 + } 214 + EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); 211 215 212 216 /* Returns: 0 success */ 213 217 int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
+133
drivers/xen/privcmd.c
··· 33 33 #include <xen/xen.h> 34 34 #include <xen/privcmd.h> 35 35 #include <xen/interface/xen.h> 36 + #include <xen/interface/memory.h> 36 37 #include <xen/interface/hvm/dm_op.h> 37 38 #include <xen/features.h> 38 39 #include <xen/page.h> ··· 723 722 return 0; 724 723 } 725 724 725 + struct remap_pfn { 726 + struct mm_struct *mm; 727 + struct page **pages; 728 + pgprot_t prot; 729 + unsigned long i; 730 + }; 731 + 732 + static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 733 + void *data) 734 + { 735 + struct remap_pfn *r = data; 736 + struct page *page = r->pages[r->i]; 737 + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); 738 + 739 + set_pte_at(r->mm, addr, ptep, pte); 740 + r->i++; 741 + 742 + return 0; 743 + } 744 + 745 + static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) 746 + { 747 + struct privcmd_data *data = file->private_data; 748 + struct mm_struct *mm = current->mm; 749 + struct vm_area_struct *vma; 750 + struct privcmd_mmap_resource kdata; 751 + xen_pfn_t *pfns = NULL; 752 + struct xen_mem_acquire_resource xdata; 753 + int rc; 754 + 755 + if (copy_from_user(&kdata, udata, sizeof(kdata))) 756 + return -EFAULT; 757 + 758 + /* If restriction is in place, check the domid matches */ 759 + if (data->domid != DOMID_INVALID && data->domid != kdata.dom) 760 + return -EPERM; 761 + 762 + down_write(&mm->mmap_sem); 763 + 764 + vma = find_vma(mm, kdata.addr); 765 + if (!vma || vma->vm_ops != &privcmd_vm_ops) { 766 + rc = -EINVAL; 767 + goto out; 768 + } 769 + 770 + pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL); 771 + if (!pfns) { 772 + rc = -ENOMEM; 773 + goto out; 774 + } 775 + 776 + if (xen_feature(XENFEAT_auto_translated_physmap)) { 777 + unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); 778 + struct page **pages; 779 + unsigned int i; 780 + 781 + rc = alloc_empty_pages(vma, nr); 782 + if (rc < 0) 783 + goto out; 784 + 785 + pages = vma->vm_private_data; 786 + for (i = 0; i < kdata.num; i++) { 787 + xen_pfn_t pfn = 788 + page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); 789 + 790 + pfns[i] = pfn + (i % XEN_PFN_PER_PAGE); 791 + } 792 + } else 793 + vma->vm_private_data = PRIV_VMA_LOCKED; 794 + 795 + memset(&xdata, 0, sizeof(xdata)); 796 + xdata.domid = kdata.dom; 797 + xdata.type = kdata.type; 798 + xdata.id = kdata.id; 799 + xdata.frame = kdata.idx; 800 + xdata.nr_frames = kdata.num; 801 + set_xen_guest_handle(xdata.frame_list, pfns); 802 + 803 + xen_preemptible_hcall_begin(); 804 + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); 805 + xen_preemptible_hcall_end(); 806 + 807 + if (rc) 808 + goto out; 809 + 810 + if (xen_feature(XENFEAT_auto_translated_physmap)) { 811 + struct remap_pfn r = { 812 + .mm = vma->vm_mm, 813 + .pages = vma->vm_private_data, 814 + .prot = vma->vm_page_prot, 815 + }; 816 + 817 + rc = apply_to_page_range(r.mm, kdata.addr, 818 + kdata.num << PAGE_SHIFT, 819 + remap_pfn_fn, &r); 820 + } else { 821 + unsigned int domid = 822 + (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? 823 + DOMID_SELF : kdata.dom; 824 + int num; 825 + 826 + num = xen_remap_domain_mfn_array(vma, 827 + kdata.addr & PAGE_MASK, 828 + pfns, kdata.num, (int *)pfns, 829 + vma->vm_page_prot, 830 + domid, 831 + vma->vm_private_data); 832 + if (num < 0) 833 + rc = num; 834 + else if (num != kdata.num) { 835 + unsigned int i; 836 + 837 + for (i = 0; i < num; i++) { 838 + rc = pfns[i]; 839 + if (rc < 0) 840 + break; 841 + } 842 + } else 843 + rc = 0; 844 + } 845 + 846 + out: 847 + up_write(&mm->mmap_sem); 848 + kfree(pfns); 849 + 850 + return rc; 851 + } 852 + 726 853 static long privcmd_ioctl(struct file *file, 727 854 unsigned int cmd, unsigned long data) 728 855 { ··· 880 751 881 752 case IOCTL_PRIVCMD_RESTRICT: 882 753 ret = privcmd_ioctl_restrict(file, udata); 754 + break; 755 + 756 + case IOCTL_PRIVCMD_MMAP_RESOURCE: 757 + ret = privcmd_ioctl_mmap_resource(file, udata); 883 758 break; 884 759 885 760 default:
+11
include/uapi/xen/privcmd.h
··· 89 89 const struct privcmd_dm_op_buf __user *ubufs; 90 90 }; 91 91 92 + struct privcmd_mmap_resource { 93 + domid_t dom; 94 + __u32 type; 95 + __u32 id; 96 + __u32 idx; 97 + __u64 num; 98 + __u64 addr; 99 + }; 100 + 92 101 /* 93 102 * @cmd: IOCTL_PRIVCMD_HYPERCALL 94 103 * @arg: &privcmd_hypercall_t ··· 123 114 _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) 124 115 #define IOCTL_PRIVCMD_RESTRICT \ 125 116 _IOC(_IOC_NONE, 'P', 6, sizeof(domid_t)) 117 + #define IOCTL_PRIVCMD_MMAP_RESOURCE \ 118 + _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource)) 126 119 127 120 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+66
include/xen/interface/memory.h
··· 265 265 }; 266 266 DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); 267 267 268 + /* 269 + * Get the pages for a particular guest resource, so that they can be 270 + * mapped directly by a tools domain. 271 + */ 272 + #define XENMEM_acquire_resource 28 273 + struct xen_mem_acquire_resource { 274 + /* IN - The domain whose resource is to be mapped */ 275 + domid_t domid; 276 + /* IN - the type of resource */ 277 + uint16_t type; 278 + 279 + #define XENMEM_resource_ioreq_server 0 280 + #define XENMEM_resource_grant_table 1 281 + 282 + /* 283 + * IN - a type-specific resource identifier, which must be zero 284 + * unless stated otherwise. 285 + * 286 + * type == XENMEM_resource_ioreq_server -> id == ioreq server id 287 + * type == XENMEM_resource_grant_table -> id defined below 288 + */ 289 + uint32_t id; 290 + 291 + #define XENMEM_resource_grant_table_id_shared 0 292 + #define XENMEM_resource_grant_table_id_status 1 293 + 294 + /* IN/OUT - As an IN parameter number of frames of the resource 295 + * to be mapped. However, if the specified value is 0 and 296 + * frame_list is NULL then this field will be set to the 297 + * maximum value supported by the implementation on return. 298 + */ 299 + uint32_t nr_frames; 300 + /* 301 + * OUT - Must be zero on entry. On return this may contain a bitwise 302 + * OR of the following values. 303 + */ 304 + uint32_t flags; 305 + 306 + /* The resource pages have been assigned to the calling domain */ 307 + #define _XENMEM_rsrc_acq_caller_owned 0 308 + #define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned) 309 + 310 + /* 311 + * IN - the index of the initial frame to be mapped. This parameter 312 + * is ignored if nr_frames is 0. 313 + */ 314 + uint64_t frame; 315 + 316 + #define XENMEM_resource_ioreq_server_frame_bufioreq 0 317 + #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) 318 + 319 + /* 320 + * IN/OUT - If the tools domain is PV then, upon return, frame_list 321 + * will be populated with the MFNs of the resource. 322 + * If the tools domain is HVM then it is expected that, on 323 + * entry, frame_list will be populated with a list of GFNs 324 + * that will be mapped to the MFNs of the resource. 325 + * If -EIO is returned then the frame_list has only been 326 + * partially mapped and it is up to the caller to unmap all 327 + * the GFNs. 328 + * This parameter may be NULL if nr_frames is 0. 329 + */ 330 + GUEST_HANDLE(xen_pfn_t) frame_list; 331 + }; 332 + DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); 333 + 268 334 #endif /* __XEN_PUBLIC_MEMORY_H__ */
+4 -3
include/xen/interface/xen.h
··· 265 265 * 266 266 * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. 267 267 */ 268 - #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ 269 - #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ 270 - #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ 268 + #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ 269 + #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ 270 + #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ 271 + #define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA. */ 271 272 272 273 /* 273 274 * MMU EXTENDED OPERATIONS
+23 -1
include/xen/xen-ops.h
··· 63 63 struct vm_area_struct; 64 64 65 65 /* 66 - * xen_remap_domain_gfn_array() - map an array of foreign frames 66 + * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn 67 67 * @vma: VMA to map the pages into 68 68 * @addr: Address at which to map the pages 69 69 * @gfn: Array of GFNs to map ··· 85 85 int *err_ptr, pgprot_t prot, 86 86 unsigned domid, 87 87 struct page **pages); 88 + 89 + /* 90 + * xen_remap_domain_mfn_array() - map an array of foreign frames by mfn 91 + * @vma: VMA to map the pages into 92 + * @addr: Address at which to map the pages 93 + * @mfn: Array of MFNs to map 94 + * @nr: Number entries in the MFN array 95 + * @err_ptr: Returns per-MFN error status. 96 + * @prot: page protection mask 97 + * @domid: Domain owning the pages 98 + * @pages: Array of pages if this domain has an auto-translated physmap 99 + * 100 + * @mfn and @err_ptr may point to the same buffer, the MFNs will be 101 + * overwritten by the error codes after they are mapped. 102 + * 103 + * Returns the number of successfully mapped frames, or a -ve error 104 + * code. 105 + */ 106 + int xen_remap_domain_mfn_array(struct vm_area_struct *vma, 107 + unsigned long addr, xen_pfn_t *mfn, int nr, 108 + int *err_ptr, pgprot_t prot, 109 + unsigned int domid, struct page **pages); 88 110 89 111 /* xen_remap_domain_gfn_range() - map a range of foreign frames 90 112 * @vma: VMA to map the pages into