Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen-gntalloc: Userspace grant allocation driver

This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

authored by

Daniel De Graaf and committed by
Konrad Rzeszutek Wilk
dd314058 aab8f11a

+546
+8
drivers/xen/Kconfig
··· 80 80 help 81 81 Allows userspace processes to use grants. 82 82 83 + config XEN_GRANT_DEV_ALLOC 84 + tristate "User-space grant reference allocator driver" 85 + depends on XEN 86 + help 87 + Allows userspace processes to create pages with access granted 88 + to other domains. This can be used to implement frontend drivers 89 + or as part of an inter-domain shared memory channel. 90 + 83 91 config XEN_PLATFORM_PCI 84 92 tristate "xen platform pci device driver" 85 93 depends on XEN_PVHVM && PCI
+2
drivers/xen/Makefile
··· 10 10 obj-$(CONFIG_XEN_BALLOON) += balloon.o 11 11 obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o 12 12 obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o 13 + obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o 13 14 obj-$(CONFIG_XENFS) += xenfs/ 14 15 obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 15 16 obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o ··· 19 18 20 19 xen-evtchn-y := evtchn.o 21 20 xen-gntdev-y := gntdev.o 21 + xen-gntalloc-y := gntalloc.o 22 22 23 23 xen-platform-pci-y := platform-pci.o
+486
drivers/xen/gntalloc.c
··· 1 + /****************************************************************************** 2 + * gntalloc.c 3 + * 4 + * Device for creating grant references (in user-space) that may be shared 5 + * with other domains. 6 + * 7 + * This program is distributed in the hope that it will be useful, 8 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 + * GNU General Public License for more details. 11 + * 12 + * You should have received a copy of the GNU General Public License 13 + * along with this program; if not, write to the Free Software 14 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 15 + */ 16 + 17 + /* 18 + * This driver exists to allow userspace programs in Linux to allocate kernel 19 + * memory that will later be shared with another domain. Without this device, 20 + * Linux userspace programs cannot create grant references. 21 + * 22 + * How this stuff works: 23 + * X -> granting a page to Y 24 + * Y -> mapping the grant from X 25 + * 26 + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. 27 + * 2. X creates an entry in the grant table that says domid(Y) can access P. 28 + * This is done without a hypercall unless the grant table needs expansion. 29 + * 3. X gives the grant reference identifier, GREF, to Y. 30 + * 4. Y maps the page, either directly into kernel memory for use in a backend 31 + * driver, or via a the gntdev device to map into the address space of an 32 + * application running in Y. This is the first point at which Xen does any 33 + * tracking of the page. 34 + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds 35 + * to the shared page, and can now communicate with Y over the shared page. 36 + * 37 + * 38 + * NOTE TO USERSPACE LIBRARIES: 39 + * The grant allocation and mmap()ing are, naturally, two separate operations. 40 + * You set up the sharing by calling the create ioctl() and then the mmap(). 41 + * Teardown requires munmap() and either close() or ioctl(). 42 + * 43 + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant 44 + * reference, this device can be used to consume kernel memory by leaving grant 45 + * references mapped by another domain when an application exits. Therefore, 46 + * there is a global limit on the number of pages that can be allocated. When 47 + * all references to the page are unmapped, it will be freed during the next 48 + * grant operation. 49 + */ 50 + 51 + #include <linux/atomic.h> 52 + #include <linux/module.h> 53 + #include <linux/miscdevice.h> 54 + #include <linux/kernel.h> 55 + #include <linux/init.h> 56 + #include <linux/slab.h> 57 + #include <linux/fs.h> 58 + #include <linux/device.h> 59 + #include <linux/mm.h> 60 + #include <linux/uaccess.h> 61 + #include <linux/types.h> 62 + #include <linux/list.h> 63 + 64 + #include <xen/xen.h> 65 + #include <xen/page.h> 66 + #include <xen/grant_table.h> 67 + #include <xen/gntalloc.h> 68 + 69 + static int limit = 1024; 70 + module_param(limit, int, 0644); 71 + MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " 72 + "the gntalloc device"); 73 + 74 + static LIST_HEAD(gref_list); 75 + static DEFINE_SPINLOCK(gref_lock); 76 + static int gref_size; 77 + 78 + /* Metadata on a grant reference. */ 79 + struct gntalloc_gref { 80 + struct list_head next_gref; /* list entry gref_list */ 81 + struct list_head next_file; /* list entry file->list, if open */ 82 + struct page *page; /* The shared page */ 83 + uint64_t file_index; /* File offset for mmap() */ 84 + unsigned int users; /* Use count - when zero, waiting on Xen */ 85 + grant_ref_t gref_id; /* The grant reference number */ 86 + }; 87 + 88 + struct gntalloc_file_private_data { 89 + struct list_head list; 90 + uint64_t index; 91 + }; 92 + 93 + static void __del_gref(struct gntalloc_gref *gref); 94 + 95 + static void do_cleanup(void) 96 + { 97 + struct gntalloc_gref *gref, *n; 98 + list_for_each_entry_safe(gref, n, &gref_list, next_gref) { 99 + if (!gref->users) 100 + __del_gref(gref); 101 + } 102 + } 103 + 104 + static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, 105 + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) 106 + { 107 + int i, rc, readonly; 108 + LIST_HEAD(queue_gref); 109 + LIST_HEAD(queue_file); 110 + struct gntalloc_gref *gref; 111 + 112 + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); 113 + rc = -ENOMEM; 114 + for (i = 0; i < op->count; i++) { 115 + gref = kzalloc(sizeof(*gref), GFP_KERNEL); 116 + if (!gref) 117 + goto undo; 118 + list_add_tail(&gref->next_gref, &queue_gref); 119 + list_add_tail(&gref->next_file, &queue_file); 120 + gref->users = 1; 121 + gref->file_index = op->index + i * PAGE_SIZE; 122 + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); 123 + if (!gref->page) 124 + goto undo; 125 + 126 + /* Grant foreign access to the page. */ 127 + gref->gref_id = gnttab_grant_foreign_access(op->domid, 128 + pfn_to_mfn(page_to_pfn(gref->page)), readonly); 129 + if (gref->gref_id < 0) { 130 + rc = gref->gref_id; 131 + goto undo; 132 + } 133 + gref_ids[i] = gref->gref_id; 134 + } 135 + 136 + /* Add to gref lists. */ 137 + spin_lock(&gref_lock); 138 + list_splice_tail(&queue_gref, &gref_list); 139 + list_splice_tail(&queue_file, &priv->list); 140 + spin_unlock(&gref_lock); 141 + 142 + return 0; 143 + 144 + undo: 145 + spin_lock(&gref_lock); 146 + gref_size -= (op->count - i); 147 + 148 + list_for_each_entry(gref, &queue_file, next_file) { 149 + /* __del_gref does not remove from queue_file */ 150 + __del_gref(gref); 151 + } 152 + 153 + /* It's possible for the target domain to map the just-allocated grant 154 + * references by blindly guessing their IDs; if this is done, then 155 + * __del_gref will leave them in the queue_gref list. They need to be 156 + * added to the global list so that we can free them when they are no 157 + * longer referenced. 158 + */ 159 + if (unlikely(!list_empty(&queue_gref))) 160 + list_splice_tail(&queue_gref, &gref_list); 161 + spin_unlock(&gref_lock); 162 + return rc; 163 + } 164 + 165 + static void __del_gref(struct gntalloc_gref *gref) 166 + { 167 + if (gref->gref_id > 0) { 168 + if (gnttab_query_foreign_access(gref->gref_id)) 169 + return; 170 + 171 + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) 172 + return; 173 + } 174 + 175 + gref_size--; 176 + list_del(&gref->next_gref); 177 + 178 + if (gref->page) 179 + __free_page(gref->page); 180 + 181 + kfree(gref); 182 + } 183 + 184 + /* finds contiguous grant references in a file, returns the first */ 185 + static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, 186 + uint64_t index, uint32_t count) 187 + { 188 + struct gntalloc_gref *rv = NULL, *gref; 189 + list_for_each_entry(gref, &priv->list, next_file) { 190 + if (gref->file_index == index && !rv) 191 + rv = gref; 192 + if (rv) { 193 + if (gref->file_index != index) 194 + return NULL; 195 + index += PAGE_SIZE; 196 + count--; 197 + if (count == 0) 198 + return rv; 199 + } 200 + } 201 + return NULL; 202 + } 203 + 204 + /* 205 + * ------------------------------------- 206 + * File operations. 207 + * ------------------------------------- 208 + */ 209 + static int gntalloc_open(struct inode *inode, struct file *filp) 210 + { 211 + struct gntalloc_file_private_data *priv; 212 + 213 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 214 + if (!priv) 215 + goto out_nomem; 216 + INIT_LIST_HEAD(&priv->list); 217 + 218 + filp->private_data = priv; 219 + 220 + pr_debug("%s: priv %p\n", __func__, priv); 221 + 222 + return 0; 223 + 224 + out_nomem: 225 + return -ENOMEM; 226 + } 227 + 228 + static int gntalloc_release(struct inode *inode, struct file *filp) 229 + { 230 + struct gntalloc_file_private_data *priv = filp->private_data; 231 + struct gntalloc_gref *gref; 232 + 233 + pr_debug("%s: priv %p\n", __func__, priv); 234 + 235 + spin_lock(&gref_lock); 236 + while (!list_empty(&priv->list)) { 237 + gref = list_entry(priv->list.next, 238 + struct gntalloc_gref, next_file); 239 + list_del(&gref->next_file); 240 + gref->users--; 241 + if (gref->users == 0) 242 + __del_gref(gref); 243 + } 244 + kfree(priv); 245 + spin_unlock(&gref_lock); 246 + 247 + return 0; 248 + } 249 + 250 + static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, 251 + struct ioctl_gntalloc_alloc_gref __user *arg) 252 + { 253 + int rc = 0; 254 + struct ioctl_gntalloc_alloc_gref op; 255 + uint32_t *gref_ids; 256 + 257 + pr_debug("%s: priv %p\n", __func__, priv); 258 + 259 + if (copy_from_user(&op, arg, sizeof(op))) { 260 + rc = -EFAULT; 261 + goto out; 262 + } 263 + 264 + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); 265 + if (!gref_ids) { 266 + rc = -ENOMEM; 267 + goto out; 268 + } 269 + 270 + spin_lock(&gref_lock); 271 + /* Clean up pages that were at zero (local) users but were still mapped 272 + * by remote domains. Since those pages count towards the limit that we 273 + * are about to enforce, removing them here is a good idea. 274 + */ 275 + do_cleanup(); 276 + if (gref_size + op.count > limit) { 277 + spin_unlock(&gref_lock); 278 + rc = -ENOSPC; 279 + goto out_free; 280 + } 281 + gref_size += op.count; 282 + op.index = priv->index; 283 + priv->index += op.count * PAGE_SIZE; 284 + spin_unlock(&gref_lock); 285 + 286 + rc = add_grefs(&op, gref_ids, priv); 287 + if (rc < 0) 288 + goto out_free; 289 + 290 + /* Once we finish add_grefs, it is unsafe to touch the new reference, 291 + * since it is possible for a concurrent ioctl to remove it (by guessing 292 + * its index). If the userspace application doesn't provide valid memory 293 + * to write the IDs to, then it will need to close the file in order to 294 + * release - which it will do by segfaulting when it tries to access the 295 + * IDs to close them. 296 + */ 297 + if (copy_to_user(arg, &op, sizeof(op))) { 298 + rc = -EFAULT; 299 + goto out_free; 300 + } 301 + if (copy_to_user(arg->gref_ids, gref_ids, 302 + sizeof(gref_ids[0]) * op.count)) { 303 + rc = -EFAULT; 304 + goto out_free; 305 + } 306 + 307 + out_free: 308 + kfree(gref_ids); 309 + out: 310 + return rc; 311 + } 312 + 313 + static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, 314 + void __user *arg) 315 + { 316 + int i, rc = 0; 317 + struct ioctl_gntalloc_dealloc_gref op; 318 + struct gntalloc_gref *gref, *n; 319 + 320 + pr_debug("%s: priv %p\n", __func__, priv); 321 + 322 + if (copy_from_user(&op, arg, sizeof(op))) { 323 + rc = -EFAULT; 324 + goto dealloc_grant_out; 325 + } 326 + 327 + spin_lock(&gref_lock); 328 + gref = find_grefs(priv, op.index, op.count); 329 + if (gref) { 330 + /* Remove from the file list only, and decrease reference count. 331 + * The later call to do_cleanup() will remove from gref_list and 332 + * free the memory if the pages aren't mapped anywhere. 333 + */ 334 + for (i = 0; i < op.count; i++) { 335 + n = list_entry(gref->next_file.next, 336 + struct gntalloc_gref, next_file); 337 + list_del(&gref->next_file); 338 + gref->users--; 339 + gref = n; 340 + } 341 + } else { 342 + rc = -EINVAL; 343 + } 344 + 345 + do_cleanup(); 346 + 347 + spin_unlock(&gref_lock); 348 + dealloc_grant_out: 349 + return rc; 350 + } 351 + 352 + static long gntalloc_ioctl(struct file *filp, unsigned int cmd, 353 + unsigned long arg) 354 + { 355 + struct gntalloc_file_private_data *priv = filp->private_data; 356 + 357 + switch (cmd) { 358 + case IOCTL_GNTALLOC_ALLOC_GREF: 359 + return gntalloc_ioctl_alloc(priv, (void __user *)arg); 360 + 361 + case IOCTL_GNTALLOC_DEALLOC_GREF: 362 + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); 363 + 364 + default: 365 + return -ENOIOCTLCMD; 366 + } 367 + 368 + return 0; 369 + } 370 + 371 + static void gntalloc_vma_close(struct vm_area_struct *vma) 372 + { 373 + struct gntalloc_gref *gref = vma->vm_private_data; 374 + if (!gref) 375 + return; 376 + 377 + spin_lock(&gref_lock); 378 + gref->users--; 379 + if (gref->users == 0) 380 + __del_gref(gref); 381 + spin_unlock(&gref_lock); 382 + } 383 + 384 + static struct vm_operations_struct gntalloc_vmops = { 385 + .close = gntalloc_vma_close, 386 + }; 387 + 388 + static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) 389 + { 390 + struct gntalloc_file_private_data *priv = filp->private_data; 391 + struct gntalloc_gref *gref; 392 + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 393 + int rv, i; 394 + 395 + pr_debug("%s: priv %p, page %lu+%d\n", __func__, 396 + priv, vma->vm_pgoff, count); 397 + 398 + if (!(vma->vm_flags & VM_SHARED)) { 399 + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); 400 + return -EINVAL; 401 + } 402 + 403 + spin_lock(&gref_lock); 404 + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); 405 + if (gref == NULL) { 406 + rv = -ENOENT; 407 + pr_debug("%s: Could not find grant reference", 408 + __func__); 409 + goto out_unlock; 410 + } 411 + 412 + vma->vm_private_data = gref; 413 + 414 + vma->vm_flags |= VM_RESERVED; 415 + vma->vm_flags |= VM_DONTCOPY; 416 + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; 417 + 418 + vma->vm_ops = &gntalloc_vmops; 419 + 420 + for (i = 0; i < count; i++) { 421 + gref->users++; 422 + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, 423 + gref->page); 424 + if (rv) 425 + goto out_unlock; 426 + 427 + gref = list_entry(gref->next_file.next, 428 + struct gntalloc_gref, next_file); 429 + } 430 + rv = 0; 431 + 432 + out_unlock: 433 + spin_unlock(&gref_lock); 434 + return rv; 435 + } 436 + 437 + static const struct file_operations gntalloc_fops = { 438 + .owner = THIS_MODULE, 439 + .open = gntalloc_open, 440 + .release = gntalloc_release, 441 + .unlocked_ioctl = gntalloc_ioctl, 442 + .mmap = gntalloc_mmap 443 + }; 444 + 445 + /* 446 + * ------------------------------------- 447 + * Module creation/destruction. 448 + * ------------------------------------- 449 + */ 450 + static struct miscdevice gntalloc_miscdev = { 451 + .minor = MISC_DYNAMIC_MINOR, 452 + .name = "xen/gntalloc", 453 + .fops = &gntalloc_fops, 454 + }; 455 + 456 + static int __init gntalloc_init(void) 457 + { 458 + int err; 459 + 460 + if (!xen_domain()) 461 + return -ENODEV; 462 + 463 + err = misc_register(&gntalloc_miscdev); 464 + if (err != 0) { 465 + printk(KERN_ERR "Could not register misc gntalloc device\n"); 466 + return err; 467 + } 468 + 469 + pr_debug("Created grant allocation device at %d,%d\n", 470 + MISC_MAJOR, gntalloc_miscdev.minor); 471 + 472 + return 0; 473 + } 474 + 475 + static void __exit gntalloc_exit(void) 476 + { 477 + misc_deregister(&gntalloc_miscdev); 478 + } 479 + 480 + module_init(gntalloc_init); 481 + module_exit(gntalloc_exit); 482 + 483 + MODULE_LICENSE("GPL"); 484 + MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " 485 + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); 486 + MODULE_DESCRIPTION("User-space grant reference allocator driver");
+50
include/xen/gntalloc.h
··· 1 + /****************************************************************************** 2 + * gntalloc.h 3 + * 4 + * Interface to /dev/xen/gntalloc. 5 + * 6 + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> 7 + * 8 + * This file is in the public domain. 9 + */ 10 + 11 + #ifndef __LINUX_PUBLIC_GNTALLOC_H__ 12 + #define __LINUX_PUBLIC_GNTALLOC_H__ 13 + 14 + /* 15 + * Allocates a new page and creates a new grant reference. 16 + */ 17 + #define IOCTL_GNTALLOC_ALLOC_GREF \ 18 + _IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) 19 + struct ioctl_gntalloc_alloc_gref { 20 + /* IN parameters */ 21 + /* The ID of the domain to be given access to the grants. */ 22 + uint16_t domid; 23 + /* Flags for this mapping */ 24 + uint16_t flags; 25 + /* Number of pages to map */ 26 + uint32_t count; 27 + /* OUT parameters */ 28 + /* The offset to be used on a subsequent call to mmap(). */ 29 + uint64_t index; 30 + /* The grant references of the newly created grant, one per page */ 31 + /* Variable size, depending on count */ 32 + uint32_t gref_ids[1]; 33 + }; 34 + 35 + #define GNTALLOC_FLAG_WRITABLE 1 36 + 37 + /* 38 + * Deallocates the grant reference, allowing the associated page to be freed if 39 + * no other domains are using it. 40 + */ 41 + #define IOCTL_GNTALLOC_DEALLOC_GREF \ 42 + _IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) 43 + struct ioctl_gntalloc_dealloc_gref { 44 + /* IN parameters */ 45 + /* The offset returned in the map operation */ 46 + uint64_t index; 47 + /* Number of references to unmap */ 48 + uint32_t count; 49 + }; 50 + #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */