Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/privcmd: Add IOCTL_PRIVCMD_DM_OP

Recently a new dm_op[1] hypercall was added to Xen to provide a mechanism
for restricting device emulators (such as QEMU) to a limited set of
hypervisor operations, and being able to audit those operations in the
kernel of the domain in which they run.

This patch adds IOCTL_PRIVCMD_DM_OP as gateway for __HYPERVISOR_dm_op.

NOTE: There is no requirement for user-space code to bounce data through
locked memory buffers (as with IOCTL_PRIVCMD_HYPERCALL) since
privcmd has enough information to lock the original buffers
directly.

[1] http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=524a98c2

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

authored by

Paul Durrant and committed by
Boris Ostrovsky
ab520be8 dc9eab6f

+196
+1
arch/arm/xen/enlighten.c
··· 457 457 EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); 458 458 EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); 459 459 EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); 460 + EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); 460 461 EXPORT_SYMBOL_GPL(privcmd_call);
+1
arch/arm/xen/hypercall.S
··· 92 92 HYPERCALL1(platform_op_raw); 93 93 HYPERCALL2(multicall); 94 94 HYPERCALL2(vm_assist); 95 + HYPERCALL3(dm_op); 95 96 96 97 ENTRY(privcmd_call) 97 98 stmdb sp!, {r4}
+1
arch/arm64/xen/hypercall.S
··· 84 84 HYPERCALL1(platform_op_raw); 85 85 HYPERCALL2(multicall); 86 86 HYPERCALL2(vm_assist); 87 + HYPERCALL3(dm_op); 87 88 88 89 ENTRY(privcmd_call) 89 90 mov x16, x0
+7
arch/x86/include/asm/xen/hypercall.h
··· 472 472 return _hypercall2(int, xenpmu_op, op, arg); 473 473 } 474 474 475 + static inline int 476 + HYPERVISOR_dm_op( 477 + domid_t dom, unsigned int nr_bufs, void *bufs) 478 + { 479 + return _hypercall3(int, dm_op, dom, nr_bufs, bufs); 480 + } 481 + 475 482 static inline void 476 483 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) 477 484 {
+139
drivers/xen/privcmd.c
··· 22 22 #include <linux/pagemap.h> 23 23 #include <linux/seq_file.h> 24 24 #include <linux/miscdevice.h> 25 + #include <linux/moduleparam.h> 25 26 26 27 #include <asm/pgalloc.h> 27 28 #include <asm/pgtable.h> ··· 33 32 #include <xen/xen.h> 34 33 #include <xen/privcmd.h> 35 34 #include <xen/interface/xen.h> 35 + #include <xen/interface/hvm/dm_op.h> 36 36 #include <xen/features.h> 37 37 #include <xen/page.h> 38 38 #include <xen/xen-ops.h> ··· 44 42 MODULE_LICENSE("GPL"); 45 43 46 44 #define PRIV_VMA_LOCKED ((void *)1) 45 + 46 + static unsigned int privcmd_dm_op_max_num = 16; 47 + module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); 48 + MODULE_PARM_DESC(dm_op_max_nr_bufs, 49 + "Maximum number of buffers per dm_op hypercall"); 50 + 51 + static unsigned int privcmd_dm_op_buf_max_size = 4096; 52 + module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, 53 + 0644); 54 + MODULE_PARM_DESC(dm_op_buf_max_size, 55 + "Maximum size of a dm_op hypercall buffer"); 47 56 48 57 static int privcmd_vma_range_is_mapped( 49 58 struct vm_area_struct *vma, ··· 561 548 goto out; 562 549 } 563 550 551 + static int lock_pages( 552 + struct privcmd_dm_op_buf kbufs[], unsigned int num, 553 + struct page *pages[], unsigned int nr_pages) 554 + { 555 + unsigned int i; 556 + 557 + for (i = 0; i < num; i++) { 558 + unsigned int requested; 559 + int pinned; 560 + 561 + requested = DIV_ROUND_UP( 562 + offset_in_page(kbufs[i].uptr) + kbufs[i].size, 563 + PAGE_SIZE); 564 + if (requested > nr_pages) 565 + return -ENOSPC; 566 + 567 + pinned = get_user_pages_fast( 568 + (unsigned long) kbufs[i].uptr, 569 + requested, FOLL_WRITE, pages); 570 + if (pinned < 0) 571 + return pinned; 572 + 573 + nr_pages -= pinned; 574 + pages += pinned; 575 + } 576 + 577 + return 0; 578 + } 579 + 580 + static void unlock_pages(struct page *pages[], unsigned int nr_pages) 581 + { 582 + unsigned int i; 583 + 584 + if (!pages) 585 + return; 586 + 587 + for (i = 0; i < nr_pages; i++) { 588 + if (pages[i]) 589 + put_page(pages[i]); 590 + } 591 + } 592 + 593 + static long privcmd_ioctl_dm_op(void __user *udata) 594 + { 595 + struct privcmd_dm_op kdata; 596 + struct privcmd_dm_op_buf *kbufs; 597 + unsigned int nr_pages = 0; 598 + struct page **pages = NULL; 599 + struct xen_dm_op_buf *xbufs = NULL; 600 + unsigned int i; 601 + long rc; 602 + 603 + if (copy_from_user(&kdata, udata, sizeof(kdata))) 604 + return -EFAULT; 605 + 606 + if (kdata.num == 0) 607 + return 0; 608 + 609 + if (kdata.num > privcmd_dm_op_max_num) 610 + return -E2BIG; 611 + 612 + kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); 613 + if (!kbufs) 614 + return -ENOMEM; 615 + 616 + if (copy_from_user(kbufs, kdata.ubufs, 617 + sizeof(*kbufs) * kdata.num)) { 618 + rc = -EFAULT; 619 + goto out; 620 + } 621 + 622 + for (i = 0; i < kdata.num; i++) { 623 + if (kbufs[i].size > privcmd_dm_op_buf_max_size) { 624 + rc = -E2BIG; 625 + goto out; 626 + } 627 + 628 + if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, 629 + kbufs[i].size)) { 630 + rc = -EFAULT; 631 + goto out; 632 + } 633 + 634 + nr_pages += DIV_ROUND_UP( 635 + offset_in_page(kbufs[i].uptr) + kbufs[i].size, 636 + PAGE_SIZE); 637 + } 638 + 639 + pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); 640 + if (!pages) { 641 + rc = -ENOMEM; 642 + goto out; 643 + } 644 + 645 + xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); 646 + if (!xbufs) { 647 + rc = -ENOMEM; 648 + goto out; 649 + } 650 + 651 + rc = lock_pages(kbufs, kdata.num, pages, nr_pages); 652 + if (rc) 653 + goto out; 654 + 655 + for (i = 0; i < kdata.num; i++) { 656 + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); 657 + xbufs[i].size = kbufs[i].size; 658 + } 659 + 660 + xen_preemptible_hcall_begin(); 661 + rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); 662 + xen_preemptible_hcall_end(); 663 + 664 + out: 665 + unlock_pages(pages, nr_pages); 666 + kfree(xbufs); 667 + kfree(pages); 668 + kfree(kbufs); 669 + 670 + return rc; 671 + } 672 + 564 673 static long privcmd_ioctl(struct file *file, 565 674 unsigned int cmd, unsigned long data) 566 675 { ··· 704 569 705 570 case IOCTL_PRIVCMD_MMAPBATCH_V2: 706 571 ret = privcmd_ioctl_mmap_batch(udata, 2); 572 + break; 573 + 574 + case IOCTL_PRIVCMD_DM_OP: 575 + ret = privcmd_ioctl_dm_op(udata); 707 576 break; 708 577 709 578 default:
+13
include/uapi/xen/privcmd.h
··· 77 77 int __user *err; /* array of error codes */ 78 78 }; 79 79 80 + struct privcmd_dm_op_buf { 81 + void __user *uptr; 82 + size_t size; 83 + }; 84 + 85 + struct privcmd_dm_op { 86 + domid_t dom; 87 + __u16 num; 88 + const struct privcmd_dm_op_buf __user *ubufs; 89 + }; 90 + 80 91 /* 81 92 * @cmd: IOCTL_PRIVCMD_HYPERCALL 82 93 * @arg: &privcmd_hypercall_t ··· 109 98 _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) 110 99 #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ 111 100 _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) 101 + #define IOCTL_PRIVCMD_DM_OP \ 102 + _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) 112 103 113 104 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+1
include/xen/arm/hypercall.h
··· 53 53 int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); 54 54 int HYPERVISOR_tmem_op(void *arg); 55 55 int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); 56 + int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, void *bufs); 56 57 int HYPERVISOR_platform_op_raw(void *arg); 57 58 static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) 58 59 {
+32
include/xen/interface/hvm/dm_op.h
··· 1 + /* 2 + * Copyright (c) 2016, Citrix Systems Inc 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a copy 5 + * of this software and associated documentation files (the "Software"), to 6 + * deal in the Software without restriction, including without limitation the 7 + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 + * sell copies of the Software, and to permit persons to whom the Software is 9 + * furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 + * DEALINGS IN THE SOFTWARE. 21 + */ 22 + 23 + #ifndef __XEN_PUBLIC_HVM_DM_OP_H__ 24 + #define __XEN_PUBLIC_HVM_DM_OP_H__ 25 + 26 + struct xen_dm_op_buf { 27 + GUEST_HANDLE(void) h; 28 + xen_ulong_t size; 29 + }; 30 + DEFINE_GUEST_HANDLE_STRUCT(xen_dm_op_buf); 31 + 32 + #endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */
+1
include/xen/interface/xen.h
··· 81 81 #define __HYPERVISOR_tmem_op 38 82 82 #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ 83 83 #define __HYPERVISOR_xenpmu_op 40 84 + #define __HYPERVISOR_dm_op 41 84 85 85 86 /* Architecture-specific hypercall definitions. */ 86 87 #define __HYPERVISOR_arch_0 48