Merge branches 'stable/hvc-console', 'stable/gntalloc.v6' and 'stable/balloon' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

* 'stable/hvc-console' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen/hvc: Disable probe_irq_on/off from poking the hvc-console IRQ line.

* 'stable/gntalloc.v6' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen: gntdev: fix build warning
xen/p2m/m2p/gnttab: do not add failed grant maps to m2p override
xen-gntdev: Add cast to pointer
xen-gntdev: Fix incorrect use of zero handle
xen: change xen/[gntdev/gntalloc] to default m
xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
xen-gntdev: Avoid double-mapping memory
xen-gntdev: Avoid unmapping ranges twice
xen-gntdev: Use map->vma for checking map validity
xen-gntdev: Fix unmap notify on PV domains
xen-gntdev: Fix memory leak when mmap fails
xen/gntalloc,gntdev: Add unmap notify ioctl
xen-gntalloc: Userspace grant allocation driver
xen-gntdev: Support mapping in HVM domains
xen-gntdev: Add reference counting to maps
xen-gntdev: Use find_vma rather than iterating our vma list manually
xen-gntdev: Change page limit to be global instead of per-open

* 'stable/balloon' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (24 commits)
xen-gntdev: Use ballooned pages for grant mappings
xen-balloon: Add interface to retrieve ballooned pages
xen-balloon: Move core balloon functionality out of module
xen/balloon: Remove pr_info's and don't alter retry_count
xen/balloon: Protect against CPU exhaust by event/x process
xen/balloon: Migration from mod_timer() to schedule_delayed_work()
xen/balloon: Removal of driver_pages

+1328 -389
+1 -2
arch/x86/xen/mmu.c
··· 79 79 80 80 /* 81 81 * Protects atomic reservation decrease/increase against concurrent increases. 82 - * Also protects non-atomic updates of current_pages and driver_pages, and 83 - * balloon lists. 82 + * Also protects non-atomic updates of current_pages and balloon lists. 84 83 */ 85 84 DEFINE_SPINLOCK(xen_reservation_lock); 86 85
+2
drivers/tty/hvc/hvc_xen.c
··· 177 177 } 178 178 if (xencons_irq < 0) 179 179 xencons_irq = 0; /* NO_IRQ */ 180 + else 181 + set_irq_noprobe(xencons_irq); 180 182 181 183 hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); 182 184 if (IS_ERR(hp))
+10
drivers/xen/Kconfig
··· 76 76 config XEN_GNTDEV 77 77 tristate "userspace grant access device driver" 78 78 depends on XEN 79 + default m 79 80 select MMU_NOTIFIER 80 81 help 81 82 Allows userspace processes to use grants. 83 + 84 + config XEN_GRANT_DEV_ALLOC 85 + tristate "User-space grant reference allocator driver" 86 + depends on XEN 87 + default m 88 + help 89 + Allows userspace processes to create pages with access granted 90 + to other domains. This can be used to implement frontend drivers 91 + or as part of an inter-domain shared memory channel. 82 92 83 93 config XEN_PLATFORM_PCI 84 94 tristate "xen platform pci device driver"
+4 -2
drivers/xen/Makefile
··· 1 - obj-y += grant-table.o features.o events.o manage.o 1 + obj-y += grant-table.o features.o events.o manage.o balloon.o 2 2 obj-y += xenbus/ 3 3 4 4 nostackp := $(call cc-option, -fno-stack-protector) ··· 7 7 obj-$(CONFIG_BLOCK) += biomerge.o 8 8 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 9 9 obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 10 - obj-$(CONFIG_XEN_BALLOON) += balloon.o 10 + obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o 11 11 obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o 12 12 obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o 13 + obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o 13 14 obj-$(CONFIG_XENFS) += xenfs/ 14 15 obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 15 16 obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o ··· 19 18 20 19 xen-evtchn-y := evtchn.o 21 20 xen-gntdev-y := gntdev.o 21 + xen-gntalloc-y := gntalloc.o 22 22 23 23 xen-platform-pci-y := platform-pci.o
+126 -235
drivers/xen/balloon.c
··· 1 1 /****************************************************************************** 2 - * balloon.c 3 - * 4 2 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 3 * 6 4 * Copyright (c) 2003, B Dragovic ··· 31 33 */ 32 34 33 35 #include <linux/kernel.h> 34 - #include <linux/module.h> 35 36 #include <linux/sched.h> 36 37 #include <linux/errno.h> 37 38 #include <linux/mm.h> ··· 39 42 #include <linux/highmem.h> 40 43 #include <linux/mutex.h> 41 44 #include <linux/list.h> 42 - #include <linux/sysdev.h> 43 45 #include <linux/gfp.h> 44 46 45 47 #include <asm/page.h> 46 48 #include <asm/pgalloc.h> 47 49 #include <asm/pgtable.h> 48 - #include <asm/uaccess.h> 49 50 #include <asm/tlb.h> 50 51 #include <asm/e820.h> 51 52 ··· 53 58 #include <xen/xen.h> 54 59 #include <xen/interface/xen.h> 55 60 #include <xen/interface/memory.h> 56 - #include <xen/xenbus.h> 61 + #include <xen/balloon.h> 57 62 #include <xen/features.h> 58 63 #include <xen/page.h> 59 64 60 - #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 65 + /* 66 + * balloon_process() state: 67 + * 68 + * BP_DONE: done or nothing to do, 69 + * BP_EAGAIN: error, go to sleep, 70 + * BP_ECANCELED: error, balloon operation canceled. 71 + */ 61 72 62 - #define BALLOON_CLASS_NAME "xen_memory" 63 - 64 - struct balloon_stats { 65 - /* We aim for 'current allocation' == 'target allocation'. */ 66 - unsigned long current_pages; 67 - unsigned long target_pages; 68 - /* 69 - * Drivers may alter the memory reservation independently, but they 70 - * must inform the balloon driver so we avoid hitting the hard limit. 71 - */ 72 - unsigned long driver_pages; 73 - /* Number of pages in high- and low-memory balloons. */ 74 - unsigned long balloon_low; 75 - unsigned long balloon_high; 73 + enum bp_state { 74 + BP_DONE, 75 + BP_EAGAIN, 76 + BP_ECANCELED 76 77 }; 78 + 77 79 78 80 static DEFINE_MUTEX(balloon_mutex); 79 81 80 - static struct sys_device balloon_sysdev; 81 - 82 - static int register_balloon(struct sys_device *sysdev); 83 - 84 - static struct balloon_stats balloon_stats; 82 + struct balloon_stats balloon_stats; 83 + EXPORT_SYMBOL_GPL(balloon_stats); 85 84 86 85 /* We increase/decrease in batches which fit in a page */ 87 86 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; ··· 93 104 94 105 /* Main work function, always executed in process context. */ 95 106 static void balloon_process(struct work_struct *work); 96 - static DECLARE_WORK(balloon_worker, balloon_process); 97 - static struct timer_list balloon_timer; 107 + static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); 98 108 99 109 /* When ballooning out (allocating memory to return to Xen) we don't really 100 110 want the kernel to try too hard since that can trigger the oom killer. */ ··· 128 140 } 129 141 130 142 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 131 - static struct page *balloon_retrieve(void) 143 + static struct page *balloon_retrieve(bool prefer_highmem) 132 144 { 133 145 struct page *page; 134 146 135 147 if (list_empty(&ballooned_pages)) 136 148 return NULL; 137 149 138 - page = list_entry(ballooned_pages.next, struct page, lru); 150 + if (prefer_highmem) 151 + page = list_entry(ballooned_pages.prev, struct page, lru); 152 + else 153 + page = list_entry(ballooned_pages.next, struct page, lru); 139 154 list_del(&page->lru); 140 155 141 156 if (PageHighMem(page)) { ··· 168 177 return list_entry(next, struct page, lru); 169 178 } 170 179 171 - static void balloon_alarm(unsigned long unused) 180 + static enum bp_state update_schedule(enum bp_state state) 172 181 { 173 - schedule_work(&balloon_worker); 182 + if (state == BP_DONE) { 183 + balloon_stats.schedule_delay = 1; 184 + balloon_stats.retry_count = 1; 185 + return BP_DONE; 186 + } 187 + 188 + ++balloon_stats.retry_count; 189 + 190 + if (balloon_stats.max_retry_count != RETRY_UNLIMITED && 191 + balloon_stats.retry_count > balloon_stats.max_retry_count) { 192 + balloon_stats.schedule_delay = 1; 193 + balloon_stats.retry_count = 1; 194 + return BP_ECANCELED; 195 + } 196 + 197 + balloon_stats.schedule_delay <<= 1; 198 + 199 + if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) 200 + balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; 201 + 202 + return BP_EAGAIN; 174 203 } 175 204 176 205 static unsigned long current_target(void) ··· 205 194 return target; 206 195 } 207 196 208 - static int increase_reservation(unsigned long nr_pages) 197 + static enum bp_state increase_reservation(unsigned long nr_pages) 209 198 { 199 + int rc; 210 200 unsigned long pfn, i; 211 201 struct page *page; 212 - long rc; 213 202 struct xen_memory_reservation reservation = { 214 203 .address_bits = 0, 215 204 .extent_order = 0, ··· 221 210 222 211 page = balloon_first_page(); 223 212 for (i = 0; i < nr_pages; i++) { 224 - BUG_ON(page == NULL); 213 + if (!page) { 214 + nr_pages = i; 215 + break; 216 + } 225 217 frame_list[i] = page_to_pfn(page); 226 218 page = balloon_next_page(page); 227 219 } ··· 232 218 set_xen_guest_handle(reservation.extent_start, frame_list); 233 219 reservation.nr_extents = nr_pages; 234 220 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); 235 - if (rc < 0) 236 - goto out; 221 + if (rc <= 0) 222 + return BP_EAGAIN; 237 223 238 224 for (i = 0; i < rc; i++) { 239 - page = balloon_retrieve(); 225 + page = balloon_retrieve(false); 240 226 BUG_ON(page == NULL); 241 227 242 228 pfn = page_to_pfn(page); ··· 263 249 264 250 balloon_stats.current_pages += rc; 265 251 266 - out: 267 - return rc < 0 ? rc : rc != nr_pages; 252 + return BP_DONE; 268 253 } 269 254 270 - static int decrease_reservation(unsigned long nr_pages) 255 + static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) 271 256 { 257 + enum bp_state state = BP_DONE; 272 258 unsigned long pfn, i; 273 259 struct page *page; 274 - int need_sleep = 0; 275 260 int ret; 276 261 struct xen_memory_reservation reservation = { 277 262 .address_bits = 0, ··· 282 269 nr_pages = ARRAY_SIZE(frame_list); 283 270 284 271 for (i = 0; i < nr_pages; i++) { 285 - if ((page = alloc_page(GFP_BALLOON)) == NULL) { 272 + if ((page = alloc_page(gfp)) == NULL) { 286 273 nr_pages = i; 287 - need_sleep = 1; 274 + state = BP_EAGAIN; 288 275 break; 289 276 } 290 277 ··· 320 307 321 308 balloon_stats.current_pages -= nr_pages; 322 309 323 - return need_sleep; 310 + return state; 324 311 } 325 312 326 313 /* ··· 331 318 */ 332 319 static void balloon_process(struct work_struct *work) 333 320 { 334 - int need_sleep = 0; 321 + enum bp_state state = BP_DONE; 335 322 long credit; 336 323 337 324 mutex_lock(&balloon_mutex); 338 325 339 326 do { 340 327 credit = current_target() - balloon_stats.current_pages; 328 + 341 329 if (credit > 0) 342 - need_sleep = (increase_reservation(credit) != 0); 330 + state = increase_reservation(credit); 331 + 343 332 if (credit < 0) 344 - need_sleep = (decrease_reservation(-credit) != 0); 333 + state = decrease_reservation(-credit, GFP_BALLOON); 334 + 335 + state = update_schedule(state); 345 336 346 337 #ifndef CONFIG_PREEMPT 347 338 if (need_resched()) 348 339 schedule(); 349 340 #endif 350 - } while ((credit != 0) && !need_sleep); 341 + } while (credit && state == BP_DONE); 351 342 352 343 /* Schedule more work if there is some still to be done. */ 353 - if (current_target() != balloon_stats.current_pages) 354 - mod_timer(&balloon_timer, jiffies + HZ); 344 + if (state == BP_EAGAIN) 345 + schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); 355 346 356 347 mutex_unlock(&balloon_mutex); 357 348 } 358 349 359 350 /* Resets the Xen limit, sets new target, and kicks off processing. */ 360 - static void balloon_set_new_target(unsigned long target) 351 + void balloon_set_new_target(unsigned long target) 361 352 { 362 353 /* No need for lock. Not read-modify-write updates. */ 363 354 balloon_stats.target_pages = target; 364 - schedule_work(&balloon_worker); 355 + schedule_delayed_work(&balloon_worker, 0); 365 356 } 357 + EXPORT_SYMBOL_GPL(balloon_set_new_target); 366 358 367 - static struct xenbus_watch target_watch = 359 + /** 360 + * alloc_xenballooned_pages - get pages that have been ballooned out 361 + * @nr_pages: Number of pages to get 362 + * @pages: pages returned 363 + * @return 0 on success, error otherwise 364 + */ 365 + int alloc_xenballooned_pages(int nr_pages, struct page** pages) 368 366 { 369 - .node = "memory/target" 370 - }; 367 + int pgno = 0; 368 + struct page* page; 369 + mutex_lock(&balloon_mutex); 370 + while (pgno < nr_pages) { 371 + page = balloon_retrieve(true); 372 + if (page) { 373 + pages[pgno++] = page; 374 + } else { 375 + enum bp_state st; 376 + st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); 377 + if (st != BP_DONE) 378 + goto out_undo; 379 + } 380 + } 381 + mutex_unlock(&balloon_mutex); 382 + return 0; 383 + out_undo: 384 + while (pgno) 385 + balloon_append(pages[--pgno]); 386 + /* Free the memory back to the kernel soon */ 387 + schedule_delayed_work(&balloon_worker, 0); 388 + mutex_unlock(&balloon_mutex); 389 + return -ENOMEM; 390 + } 391 + EXPORT_SYMBOL(alloc_xenballooned_pages); 371 392 372 - /* React to a change in the target key */ 373 - static void watch_target(struct xenbus_watch *watch, 374 - const char **vec, unsigned int len) 393 + /** 394 + * free_xenballooned_pages - return pages retrieved with get_ballooned_pages 395 + * @nr_pages: Number of pages 396 + * @pages: pages to return 397 + */ 398 + void free_xenballooned_pages(int nr_pages, struct page** pages) 375 399 { 376 - unsigned long long new_target; 377 - int err; 400 + int i; 378 401 379 - err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); 380 - if (err != 1) { 381 - /* This is ok (for domain0 at least) - so just return */ 382 - return; 402 + mutex_lock(&balloon_mutex); 403 + 404 + for (i = 0; i < nr_pages; i++) { 405 + if (pages[i]) 406 + balloon_append(pages[i]); 383 407 } 384 408 385 - /* The given memory/target value is in KiB, so it needs converting to 386 - * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 387 - */ 388 - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 409 + /* The balloon may be too large now. Shrink it if needed. */ 410 + if (current_target() != balloon_stats.current_pages) 411 + schedule_delayed_work(&balloon_worker, 0); 412 + 413 + mutex_unlock(&balloon_mutex); 389 414 } 390 - 391 - static int balloon_init_watcher(struct notifier_block *notifier, 392 - unsigned long event, 393 - void *data) 394 - { 395 - int err; 396 - 397 - err = register_xenbus_watch(&target_watch); 398 - if (err) 399 - printk(KERN_ERR "Failed to set balloon watcher\n"); 400 - 401 - return NOTIFY_DONE; 402 - } 403 - 404 - static struct notifier_block xenstore_notifier; 415 + EXPORT_SYMBOL(free_xenballooned_pages); 405 416 406 417 static int __init balloon_init(void) 407 418 { ··· 435 398 if (!xen_domain()) 436 399 return -ENODEV; 437 400 438 - pr_info("xen_balloon: Initialising balloon driver.\n"); 401 + pr_info("xen/balloon: Initialising balloon driver.\n"); 439 402 440 403 if (xen_pv_domain()) 441 404 nr_pages = xen_start_info->nr_pages; ··· 445 408 balloon_stats.target_pages = balloon_stats.current_pages; 446 409 balloon_stats.balloon_low = 0; 447 410 balloon_stats.balloon_high = 0; 448 - balloon_stats.driver_pages = 0UL; 449 411 450 - init_timer(&balloon_timer); 451 - balloon_timer.data = 0; 452 - balloon_timer.function = balloon_alarm; 453 - 454 - register_balloon(&balloon_sysdev); 412 + balloon_stats.schedule_delay = 1; 413 + balloon_stats.max_schedule_delay = 32; 414 + balloon_stats.retry_count = 1; 415 + balloon_stats.max_retry_count = RETRY_UNLIMITED; 455 416 456 417 /* 457 418 * Initialise the balloon with excess memory space. We need ··· 471 436 __balloon_append(page); 472 437 } 473 438 474 - target_watch.callback = watch_target; 475 - xenstore_notifier.notifier_call = balloon_init_watcher; 476 - 477 - register_xenstore_notifier(&xenstore_notifier); 478 - 479 439 return 0; 480 440 } 481 441 482 442 subsys_initcall(balloon_init); 483 - 484 - static void balloon_exit(void) 485 - { 486 - /* XXX - release balloon here */ 487 - return; 488 - } 489 - 490 - module_exit(balloon_exit); 491 - 492 - #define BALLOON_SHOW(name, format, args...) \ 493 - static ssize_t show_##name(struct sys_device *dev, \ 494 - struct sysdev_attribute *attr, \ 495 - char *buf) \ 496 - { \ 497 - return sprintf(buf, format, ##args); \ 498 - } \ 499 - static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 500 - 501 - BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 502 - BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); 503 - BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); 504 - BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); 505 - 506 - static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, 507 - char *buf) 508 - { 509 - return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 510 - } 511 - 512 - static ssize_t store_target_kb(struct sys_device *dev, 513 - struct sysdev_attribute *attr, 514 - const char *buf, 515 - size_t count) 516 - { 517 - char *endchar; 518 - unsigned long long target_bytes; 519 - 520 - if (!capable(CAP_SYS_ADMIN)) 521 - return -EPERM; 522 - 523 - target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; 524 - 525 - balloon_set_new_target(target_bytes >> PAGE_SHIFT); 526 - 527 - return count; 528 - } 529 - 530 - static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, 531 - show_target_kb, store_target_kb); 532 - 533 - 534 - static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, 535 - char *buf) 536 - { 537 - return sprintf(buf, "%llu\n", 538 - (unsigned long long)balloon_stats.target_pages 539 - << PAGE_SHIFT); 540 - } 541 - 542 - static ssize_t store_target(struct sys_device *dev, 543 - struct sysdev_attribute *attr, 544 - const char *buf, 545 - size_t count) 546 - { 547 - char *endchar; 548 - unsigned long long target_bytes; 549 - 550 - if (!capable(CAP_SYS_ADMIN)) 551 - return -EPERM; 552 - 553 - target_bytes = memparse(buf, &endchar); 554 - 555 - balloon_set_new_target(target_bytes >> PAGE_SHIFT); 556 - 557 - return count; 558 - } 559 - 560 - static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, 561 - show_target, store_target); 562 - 563 - 564 - static struct sysdev_attribute *balloon_attrs[] = { 565 - &attr_target_kb, 566 - &attr_target, 567 - }; 568 - 569 - static struct attribute *balloon_info_attrs[] = { 570 - &attr_current_kb.attr, 571 - &attr_low_kb.attr, 572 - &attr_high_kb.attr, 573 - &attr_driver_kb.attr, 574 - NULL 575 - }; 576 - 577 - static struct attribute_group balloon_info_group = { 578 - .name = "info", 579 - .attrs = balloon_info_attrs, 580 - }; 581 - 582 - static struct sysdev_class balloon_sysdev_class = { 583 - .name = BALLOON_CLASS_NAME, 584 - }; 585 - 586 - static int register_balloon(struct sys_device *sysdev) 587 - { 588 - int i, error; 589 - 590 - error = sysdev_class_register(&balloon_sysdev_class); 591 - if (error) 592 - return error; 593 - 594 - sysdev->id = 0; 595 - sysdev->cls = &balloon_sysdev_class; 596 - 597 - error = sysdev_register(sysdev); 598 - if (error) { 599 - sysdev_class_unregister(&balloon_sysdev_class); 600 - return error; 601 - } 602 - 603 - for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { 604 - error = sysdev_create_file(sysdev, balloon_attrs[i]); 605 - if (error) 606 - goto fail; 607 - } 608 - 609 - error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); 610 - if (error) 611 - goto fail; 612 - 613 - return 0; 614 - 615 - fail: 616 - while (--i >= 0) 617 - sysdev_remove_file(sysdev, balloon_attrs[i]); 618 - sysdev_unregister(sysdev); 619 - sysdev_class_unregister(&balloon_sysdev_class); 620 - return error; 621 - } 622 443 623 444 MODULE_LICENSE("GPL");
+545
drivers/xen/gntalloc.c
··· 1 + /****************************************************************************** 2 + * gntalloc.c 3 + * 4 + * Device for creating grant references (in user-space) that may be shared 5 + * with other domains. 6 + * 7 + * This program is distributed in the hope that it will be useful, 8 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 + * GNU General Public License for more details. 11 + * 12 + * You should have received a copy of the GNU General Public License 13 + * along with this program; if not, write to the Free Software 14 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 15 + */ 16 + 17 + /* 18 + * This driver exists to allow userspace programs in Linux to allocate kernel 19 + * memory that will later be shared with another domain. Without this device, 20 + * Linux userspace programs cannot create grant references. 21 + * 22 + * How this stuff works: 23 + * X -> granting a page to Y 24 + * Y -> mapping the grant from X 25 + * 26 + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. 27 + * 2. X creates an entry in the grant table that says domid(Y) can access P. 28 + * This is done without a hypercall unless the grant table needs expansion. 29 + * 3. X gives the grant reference identifier, GREF, to Y. 30 + * 4. Y maps the page, either directly into kernel memory for use in a backend 31 + * driver, or via a the gntdev device to map into the address space of an 32 + * application running in Y. This is the first point at which Xen does any 33 + * tracking of the page. 34 + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds 35 + * to the shared page, and can now communicate with Y over the shared page. 36 + * 37 + * 38 + * NOTE TO USERSPACE LIBRARIES: 39 + * The grant allocation and mmap()ing are, naturally, two separate operations. 40 + * You set up the sharing by calling the create ioctl() and then the mmap(). 41 + * Teardown requires munmap() and either close() or ioctl(). 42 + * 43 + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant 44 + * reference, this device can be used to consume kernel memory by leaving grant 45 + * references mapped by another domain when an application exits. Therefore, 46 + * there is a global limit on the number of pages that can be allocated. When 47 + * all references to the page are unmapped, it will be freed during the next 48 + * grant operation. 49 + */ 50 + 51 + #include <linux/atomic.h> 52 + #include <linux/module.h> 53 + #include <linux/miscdevice.h> 54 + #include <linux/kernel.h> 55 + #include <linux/init.h> 56 + #include <linux/slab.h> 57 + #include <linux/fs.h> 58 + #include <linux/device.h> 59 + #include <linux/mm.h> 60 + #include <linux/uaccess.h> 61 + #include <linux/types.h> 62 + #include <linux/list.h> 63 + #include <linux/highmem.h> 64 + 65 + #include <xen/xen.h> 66 + #include <xen/page.h> 67 + #include <xen/grant_table.h> 68 + #include <xen/gntalloc.h> 69 + #include <xen/events.h> 70 + 71 + static int limit = 1024; 72 + module_param(limit, int, 0644); 73 + MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " 74 + "the gntalloc device"); 75 + 76 + static LIST_HEAD(gref_list); 77 + static DEFINE_SPINLOCK(gref_lock); 78 + static int gref_size; 79 + 80 + struct notify_info { 81 + uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */ 82 + uint16_t flags:2; /* Bits 12-13: Unmap notification flags */ 83 + int event; /* Port (event channel) to notify */ 84 + }; 85 + 86 + /* Metadata on a grant reference. */ 87 + struct gntalloc_gref { 88 + struct list_head next_gref; /* list entry gref_list */ 89 + struct list_head next_file; /* list entry file->list, if open */ 90 + struct page *page; /* The shared page */ 91 + uint64_t file_index; /* File offset for mmap() */ 92 + unsigned int users; /* Use count - when zero, waiting on Xen */ 93 + grant_ref_t gref_id; /* The grant reference number */ 94 + struct notify_info notify; /* Unmap notification */ 95 + }; 96 + 97 + struct gntalloc_file_private_data { 98 + struct list_head list; 99 + uint64_t index; 100 + }; 101 + 102 + static void __del_gref(struct gntalloc_gref *gref); 103 + 104 + static void do_cleanup(void) 105 + { 106 + struct gntalloc_gref *gref, *n; 107 + list_for_each_entry_safe(gref, n, &gref_list, next_gref) { 108 + if (!gref->users) 109 + __del_gref(gref); 110 + } 111 + } 112 + 113 + static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, 114 + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) 115 + { 116 + int i, rc, readonly; 117 + LIST_HEAD(queue_gref); 118 + LIST_HEAD(queue_file); 119 + struct gntalloc_gref *gref; 120 + 121 + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); 122 + rc = -ENOMEM; 123 + for (i = 0; i < op->count; i++) { 124 + gref = kzalloc(sizeof(*gref), GFP_KERNEL); 125 + if (!gref) 126 + goto undo; 127 + list_add_tail(&gref->next_gref, &queue_gref); 128 + list_add_tail(&gref->next_file, &queue_file); 129 + gref->users = 1; 130 + gref->file_index = op->index + i * PAGE_SIZE; 131 + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); 132 + if (!gref->page) 133 + goto undo; 134 + 135 + /* Grant foreign access to the page. */ 136 + gref->gref_id = gnttab_grant_foreign_access(op->domid, 137 + pfn_to_mfn(page_to_pfn(gref->page)), readonly); 138 + if (gref->gref_id < 0) { 139 + rc = gref->gref_id; 140 + goto undo; 141 + } 142 + gref_ids[i] = gref->gref_id; 143 + } 144 + 145 + /* Add to gref lists. */ 146 + spin_lock(&gref_lock); 147 + list_splice_tail(&queue_gref, &gref_list); 148 + list_splice_tail(&queue_file, &priv->list); 149 + spin_unlock(&gref_lock); 150 + 151 + return 0; 152 + 153 + undo: 154 + spin_lock(&gref_lock); 155 + gref_size -= (op->count - i); 156 + 157 + list_for_each_entry(gref, &queue_file, next_file) { 158 + /* __del_gref does not remove from queue_file */ 159 + __del_gref(gref); 160 + } 161 + 162 + /* It's possible for the target domain to map the just-allocated grant 163 + * references by blindly guessing their IDs; if this is done, then 164 + * __del_gref will leave them in the queue_gref list. They need to be 165 + * added to the global list so that we can free them when they are no 166 + * longer referenced. 167 + */ 168 + if (unlikely(!list_empty(&queue_gref))) 169 + list_splice_tail(&queue_gref, &gref_list); 170 + spin_unlock(&gref_lock); 171 + return rc; 172 + } 173 + 174 + static void __del_gref(struct gntalloc_gref *gref) 175 + { 176 + if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 177 + uint8_t *tmp = kmap(gref->page); 178 + tmp[gref->notify.pgoff] = 0; 179 + kunmap(gref->page); 180 + } 181 + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) 182 + notify_remote_via_evtchn(gref->notify.event); 183 + 184 + gref->notify.flags = 0; 185 + 186 + if (gref->gref_id > 0) { 187 + if (gnttab_query_foreign_access(gref->gref_id)) 188 + return; 189 + 190 + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) 191 + return; 192 + } 193 + 194 + gref_size--; 195 + list_del(&gref->next_gref); 196 + 197 + if (gref->page) 198 + __free_page(gref->page); 199 + 200 + kfree(gref); 201 + } 202 + 203 + /* finds contiguous grant references in a file, returns the first */ 204 + static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, 205 + uint64_t index, uint32_t count) 206 + { 207 + struct gntalloc_gref *rv = NULL, *gref; 208 + list_for_each_entry(gref, &priv->list, next_file) { 209 + if (gref->file_index == index && !rv) 210 + rv = gref; 211 + if (rv) { 212 + if (gref->file_index != index) 213 + return NULL; 214 + index += PAGE_SIZE; 215 + count--; 216 + if (count == 0) 217 + return rv; 218 + } 219 + } 220 + return NULL; 221 + } 222 + 223 + /* 224 + * ------------------------------------- 225 + * File operations. 226 + * ------------------------------------- 227 + */ 228 + static int gntalloc_open(struct inode *inode, struct file *filp) 229 + { 230 + struct gntalloc_file_private_data *priv; 231 + 232 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 233 + if (!priv) 234 + goto out_nomem; 235 + INIT_LIST_HEAD(&priv->list); 236 + 237 + filp->private_data = priv; 238 + 239 + pr_debug("%s: priv %p\n", __func__, priv); 240 + 241 + return 0; 242 + 243 + out_nomem: 244 + return -ENOMEM; 245 + } 246 + 247 + static int gntalloc_release(struct inode *inode, struct file *filp) 248 + { 249 + struct gntalloc_file_private_data *priv = filp->private_data; 250 + struct gntalloc_gref *gref; 251 + 252 + pr_debug("%s: priv %p\n", __func__, priv); 253 + 254 + spin_lock(&gref_lock); 255 + while (!list_empty(&priv->list)) { 256 + gref = list_entry(priv->list.next, 257 + struct gntalloc_gref, next_file); 258 + list_del(&gref->next_file); 259 + gref->users--; 260 + if (gref->users == 0) 261 + __del_gref(gref); 262 + } 263 + kfree(priv); 264 + spin_unlock(&gref_lock); 265 + 266 + return 0; 267 + } 268 + 269 + static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, 270 + struct ioctl_gntalloc_alloc_gref __user *arg) 271 + { 272 + int rc = 0; 273 + struct ioctl_gntalloc_alloc_gref op; 274 + uint32_t *gref_ids; 275 + 276 + pr_debug("%s: priv %p\n", __func__, priv); 277 + 278 + if (copy_from_user(&op, arg, sizeof(op))) { 279 + rc = -EFAULT; 280 + goto out; 281 + } 282 + 283 + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); 284 + if (!gref_ids) { 285 + rc = -ENOMEM; 286 + goto out; 287 + } 288 + 289 + spin_lock(&gref_lock); 290 + /* Clean up pages that were at zero (local) users but were still mapped 291 + * by remote domains. Since those pages count towards the limit that we 292 + * are about to enforce, removing them here is a good idea. 293 + */ 294 + do_cleanup(); 295 + if (gref_size + op.count > limit) { 296 + spin_unlock(&gref_lock); 297 + rc = -ENOSPC; 298 + goto out_free; 299 + } 300 + gref_size += op.count; 301 + op.index = priv->index; 302 + priv->index += op.count * PAGE_SIZE; 303 + spin_unlock(&gref_lock); 304 + 305 + rc = add_grefs(&op, gref_ids, priv); 306 + if (rc < 0) 307 + goto out_free; 308 + 309 + /* Once we finish add_grefs, it is unsafe to touch the new reference, 310 + * since it is possible for a concurrent ioctl to remove it (by guessing 311 + * its index). If the userspace application doesn't provide valid memory 312 + * to write the IDs to, then it will need to close the file in order to 313 + * release - which it will do by segfaulting when it tries to access the 314 + * IDs to close them. 315 + */ 316 + if (copy_to_user(arg, &op, sizeof(op))) { 317 + rc = -EFAULT; 318 + goto out_free; 319 + } 320 + if (copy_to_user(arg->gref_ids, gref_ids, 321 + sizeof(gref_ids[0]) * op.count)) { 322 + rc = -EFAULT; 323 + goto out_free; 324 + } 325 + 326 + out_free: 327 + kfree(gref_ids); 328 + out: 329 + return rc; 330 + } 331 + 332 + static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, 333 + void __user *arg) 334 + { 335 + int i, rc = 0; 336 + struct ioctl_gntalloc_dealloc_gref op; 337 + struct gntalloc_gref *gref, *n; 338 + 339 + pr_debug("%s: priv %p\n", __func__, priv); 340 + 341 + if (copy_from_user(&op, arg, sizeof(op))) { 342 + rc = -EFAULT; 343 + goto dealloc_grant_out; 344 + } 345 + 346 + spin_lock(&gref_lock); 347 + gref = find_grefs(priv, op.index, op.count); 348 + if (gref) { 349 + /* Remove from the file list only, and decrease reference count. 350 + * The later call to do_cleanup() will remove from gref_list and 351 + * free the memory if the pages aren't mapped anywhere. 352 + */ 353 + for (i = 0; i < op.count; i++) { 354 + n = list_entry(gref->next_file.next, 355 + struct gntalloc_gref, next_file); 356 + list_del(&gref->next_file); 357 + gref->users--; 358 + gref = n; 359 + } 360 + } else { 361 + rc = -EINVAL; 362 + } 363 + 364 + do_cleanup(); 365 + 366 + spin_unlock(&gref_lock); 367 + dealloc_grant_out: 368 + return rc; 369 + } 370 + 371 + static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, 372 + void __user *arg) 373 + { 374 + struct ioctl_gntalloc_unmap_notify op; 375 + struct gntalloc_gref *gref; 376 + uint64_t index; 377 + int pgoff; 378 + int rc; 379 + 380 + if (copy_from_user(&op, arg, sizeof(op))) 381 + return -EFAULT; 382 + 383 + index = op.index & ~(PAGE_SIZE - 1); 384 + pgoff = op.index & (PAGE_SIZE - 1); 385 + 386 + spin_lock(&gref_lock); 387 + 388 + gref = find_grefs(priv, index, 1); 389 + if (!gref) { 390 + rc = -ENOENT; 391 + goto unlock_out; 392 + } 393 + 394 + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { 395 + rc = -EINVAL; 396 + goto unlock_out; 397 + } 398 + 399 + gref->notify.flags = op.action; 400 + gref->notify.pgoff = pgoff; 401 + gref->notify.event = op.event_channel_port; 402 + rc = 0; 403 + unlock_out: 404 + spin_unlock(&gref_lock); 405 + return rc; 406 + } 407 + 408 + static long gntalloc_ioctl(struct file *filp, unsigned int cmd, 409 + unsigned long arg) 410 + { 411 + struct gntalloc_file_private_data *priv = filp->private_data; 412 + 413 + switch (cmd) { 414 + case IOCTL_GNTALLOC_ALLOC_GREF: 415 + return gntalloc_ioctl_alloc(priv, (void __user *)arg); 416 + 417 + case IOCTL_GNTALLOC_DEALLOC_GREF: 418 + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); 419 + 420 + case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: 421 + return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg); 422 + 423 + default: 424 + return -ENOIOCTLCMD; 425 + } 426 + 427 + return 0; 428 + } 429 + 430 + static void gntalloc_vma_close(struct vm_area_struct *vma) 431 + { 432 + struct gntalloc_gref *gref = vma->vm_private_data; 433 + if (!gref) 434 + return; 435 + 436 + spin_lock(&gref_lock); 437 + gref->users--; 438 + if (gref->users == 0) 439 + __del_gref(gref); 440 + spin_unlock(&gref_lock); 441 + } 442 + 443 + static struct vm_operations_struct gntalloc_vmops = { 444 + .close = gntalloc_vma_close, 445 + }; 446 + 447 + static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) 448 + { 449 + struct gntalloc_file_private_data *priv = filp->private_data; 450 + struct gntalloc_gref *gref; 451 + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 452 + int rv, i; 453 + 454 + pr_debug("%s: priv %p, page %lu+%d\n", __func__, 455 + priv, vma->vm_pgoff, count); 456 + 457 + if (!(vma->vm_flags & VM_SHARED)) { 458 + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); 459 + return -EINVAL; 460 + } 461 + 462 + spin_lock(&gref_lock); 463 + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); 464 + if (gref == NULL) { 465 + rv = -ENOENT; 466 + pr_debug("%s: Could not find grant reference", 467 + __func__); 468 + goto out_unlock; 469 + } 470 + 471 + vma->vm_private_data = gref; 472 + 473 + vma->vm_flags |= VM_RESERVED; 474 + vma->vm_flags |= VM_DONTCOPY; 475 + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; 476 + 477 + vma->vm_ops = &gntalloc_vmops; 478 + 479 + for (i = 0; i < count; i++) { 480 + gref->users++; 481 + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, 482 + gref->page); 483 + if (rv) 484 + goto out_unlock; 485 + 486 + gref = list_entry(gref->next_file.next, 487 + struct gntalloc_gref, next_file); 488 + } 489 + rv = 0; 490 + 491 + out_unlock: 492 + spin_unlock(&gref_lock); 493 + return rv; 494 + } 495 + 496 + static const struct file_operations gntalloc_fops = { 497 + .owner = THIS_MODULE, 498 + .open = gntalloc_open, 499 + .release = gntalloc_release, 500 + .unlocked_ioctl = gntalloc_ioctl, 501 + .mmap = gntalloc_mmap 502 + }; 503 + 504 + /* 505 + * ------------------------------------- 506 + * Module creation/destruction. 507 + * ------------------------------------- 508 + */ 509 + static struct miscdevice gntalloc_miscdev = { 510 + .minor = MISC_DYNAMIC_MINOR, 511 + .name = "xen/gntalloc", 512 + .fops = &gntalloc_fops, 513 + }; 514 + 515 + static int __init gntalloc_init(void) 516 + { 517 + int err; 518 + 519 + if (!xen_domain()) 520 + return -ENODEV; 521 + 522 + err = misc_register(&gntalloc_miscdev); 523 + if (err != 0) { 524 + printk(KERN_ERR "Could not register misc gntalloc device\n"); 525 + return err; 526 + } 527 + 528 + pr_debug("Created grant allocation device at %d,%d\n", 529 + MISC_MAJOR, gntalloc_miscdev.minor); 530 + 531 + return 0; 532 + } 533 + 534 + static void __exit gntalloc_exit(void) 535 + { 536 + misc_deregister(&gntalloc_miscdev); 537 + } 538 + 539 + module_init(gntalloc_init); 540 + module_exit(gntalloc_exit); 541 + 542 + MODULE_LICENSE("GPL"); 543 + MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " 544 + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); 545 + MODULE_DESCRIPTION("User-space grant reference allocator driver");
+236 -150
drivers/xen/gntdev.c
··· 32 32 #include <linux/sched.h> 33 33 #include <linux/spinlock.h> 34 34 #include <linux/slab.h> 35 + #include <linux/highmem.h> 35 36 36 37 #include <xen/xen.h> 37 38 #include <xen/grant_table.h> 39 + #include <xen/balloon.h> 38 40 #include <xen/gntdev.h> 41 + #include <xen/events.h> 39 42 #include <asm/xen/hypervisor.h> 40 43 #include <asm/xen/hypercall.h> 41 44 #include <asm/xen/page.h> ··· 48 45 "Gerd Hoffmann <kraxel@redhat.com>"); 49 46 MODULE_DESCRIPTION("User-space granted page access driver"); 50 47 51 - static int limit = 1024; 48 + static int limit = 1024*1024; 52 49 module_param(limit, int, 0644); 53 - MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at " 54 - "once by a gntdev instance"); 50 + MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " 51 + "the gntdev device"); 52 + 53 + static atomic_t pages_mapped = ATOMIC_INIT(0); 54 + 55 + static int use_ptemod; 55 56 56 57 struct gntdev_priv { 57 58 struct list_head maps; 58 - uint32_t used; 59 - uint32_t limit; 60 59 /* lock protects maps from concurrent changes */ 61 60 spinlock_t lock; 62 61 struct mm_struct *mm; 63 62 struct mmu_notifier mn; 64 63 }; 65 64 65 + struct unmap_notify { 66 + int flags; 67 + /* Address relative to the start of the grant_map */ 68 + int addr; 69 + int event; 70 + }; 71 + 66 72 struct grant_map { 67 73 struct list_head next; 68 - struct gntdev_priv *priv; 69 74 struct vm_area_struct *vma; 70 75 int index; 71 76 int count; 72 77 int flags; 73 - int is_mapped; 78 + atomic_t users; 79 + struct unmap_notify notify; 74 80 struct ioctl_gntdev_grant_ref *grants; 75 81 struct gnttab_map_grant_ref *map_ops; 76 82 struct gnttab_unmap_grant_ref *unmap_ops; 77 83 struct page **pages; 78 84 }; 85 + 86 + static int unmap_grant_pages(struct grant_map *map, int offset, int pages); 79 87 80 88 /* ------------------------------------------------------------------ */ 81 89 ··· 96 82 #ifdef DEBUG 97 83 struct grant_map *map; 98 84 99 - pr_debug("maps list (priv %p, usage %d/%d)\n", 100 - priv, priv->used, priv->limit); 101 - 85 + pr_debug("%s: maps list (priv %p)\n", __func__, priv); 102 86 list_for_each_entry(map, &priv->maps, next) 103 87 pr_debug(" index %2d, count %2d %s\n", 104 88 map->index, map->count, ··· 123 111 NULL == add->pages) 124 112 goto err; 125 113 114 + if (alloc_xenballooned_pages(count, add->pages)) 115 + goto err; 116 + 126 117 for (i = 0; i < count; i++) { 127 - add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 128 - if (add->pages[i] == NULL) 129 - goto err; 118 + add->map_ops[i].handle = -1; 119 + add->unmap_ops[i].handle = -1; 130 120 } 131 121 132 122 add->index = 0; 133 123 add->count = count; 134 - add->priv = priv; 135 - 136 - if (add->count + priv->used > priv->limit) 137 - goto err; 124 + atomic_set(&add->users, 1); 138 125 139 126 return add; 140 127 141 128 err: 142 - if (add->pages) 143 - for (i = 0; i < count; i++) { 144 - if (add->pages[i]) 145 - __free_page(add->pages[i]); 146 - } 147 129 kfree(add->pages); 148 130 kfree(add->grants); 149 131 kfree(add->map_ops); ··· 160 154 list_add_tail(&add->next, &priv->maps); 161 155 162 156 done: 163 - priv->used += add->count; 164 157 gntdev_print_maps(priv, "[new]", add->index); 165 158 } 166 159 ··· 171 166 list_for_each_entry(map, &priv->maps, next) { 172 167 if (map->index != index) 173 168 continue; 174 - if (map->count != count) 169 + if (count && map->count != count) 175 170 continue; 176 171 return map; 177 172 } 178 173 return NULL; 179 174 } 180 175 181 - static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, 182 - unsigned long vaddr) 176 + static void gntdev_put_map(struct grant_map *map) 183 177 { 184 - struct grant_map *map; 185 - 186 - list_for_each_entry(map, &priv->maps, next) { 187 - if (!map->vma) 188 - continue; 189 - if (vaddr < map->vma->vm_start) 190 - continue; 191 - if (vaddr >= map->vma->vm_end) 192 - continue; 193 - return map; 194 - } 195 - return NULL; 196 - } 197 - 198 - static int gntdev_del_map(struct grant_map *map) 199 - { 200 - int i; 201 - 202 - if (map->vma) 203 - return -EBUSY; 204 - for (i = 0; i < map->count; i++) 205 - if (map->unmap_ops[i].handle) 206 - return -EBUSY; 207 - 208 - map->priv->used -= map->count; 209 - list_del(&map->next); 210 - return 0; 211 - } 212 - 213 - static void gntdev_free_map(struct grant_map *map) 214 - { 215 - int i; 216 - 217 178 if (!map) 218 179 return; 219 180 220 - if (map->pages) 221 - for (i = 0; i < map->count; i++) { 222 - if (map->pages[i]) 223 - __free_page(map->pages[i]); 224 - } 181 + if (!atomic_dec_and_test(&map->users)) 182 + return; 183 + 184 + atomic_sub(map->count, &pages_mapped); 185 + 186 + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 187 + notify_remote_via_evtchn(map->notify.event); 188 + } 189 + 190 + if (map->pages) { 191 + if (!use_ptemod) 192 + unmap_grant_pages(map, 0, map->count); 193 + 194 + free_xenballooned_pages(map->count, map->pages); 195 + } 225 196 kfree(map->pages); 226 197 kfree(map->grants); 227 198 kfree(map->map_ops); ··· 212 231 { 213 232 struct grant_map *map = data; 214 233 unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; 234 + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; 215 235 u64 pte_maddr; 216 236 217 237 BUG_ON(pgnr >= map->count); 218 238 pte_maddr = arbitrary_virt_to_machine(pte).maddr; 219 239 220 - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, 221 - GNTMAP_contains_pte | map->flags, 240 + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, 222 241 map->grants[pgnr].ref, 223 242 map->grants[pgnr].domid); 224 - gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, 225 - GNTMAP_contains_pte | map->flags, 226 - 0 /* handle */); 243 + gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, 244 + -1 /* handle */); 227 245 return 0; 228 246 } 229 247 230 248 static int map_grant_pages(struct grant_map *map) 231 249 { 232 250 int i, err = 0; 251 + 252 + if (!use_ptemod) { 253 + /* Note: it could already be mapped */ 254 + if (map->map_ops[0].handle != -1) 255 + return 0; 256 + for (i = 0; i < map->count; i++) { 257 + unsigned long addr = (unsigned long) 258 + pfn_to_kaddr(page_to_pfn(map->pages[i])); 259 + gnttab_set_map_op(&map->map_ops[i], addr, map->flags, 260 + map->grants[i].ref, 261 + map->grants[i].domid); 262 + gnttab_set_unmap_op(&map->unmap_ops[i], addr, 263 + map->flags, -1 /* handle */); 264 + } 265 + } 233 266 234 267 pr_debug("map %d+%d\n", map->index, map->count); 235 268 err = gnttab_map_refs(map->map_ops, map->pages, map->count); ··· 253 258 for (i = 0; i < map->count; i++) { 254 259 if (map->map_ops[i].status) 255 260 err = -EINVAL; 256 - map->unmap_ops[i].handle = map->map_ops[i].handle; 261 + else { 262 + BUG_ON(map->map_ops[i].handle == -1); 263 + map->unmap_ops[i].handle = map->map_ops[i].handle; 264 + pr_debug("map handle=%d\n", map->map_ops[i].handle); 265 + } 257 266 } 258 267 return err; 259 268 } 260 269 261 - static int unmap_grant_pages(struct grant_map *map, int offset, int pages) 270 + static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) 262 271 { 263 272 int i, err = 0; 264 273 265 - pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 266 - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); 274 + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 275 + int pgno = (map->notify.addr >> PAGE_SHIFT); 276 + if (pgno >= offset && pgno < offset + pages && use_ptemod) { 277 + void __user *tmp = (void __user *) 278 + map->vma->vm_start + map->notify.addr; 279 + err = copy_to_user(tmp, &err, 1); 280 + if (err) 281 + return err; 282 + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 283 + } else if (pgno >= offset && pgno < offset + pages) { 284 + uint8_t *tmp = kmap(map->pages[pgno]); 285 + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; 286 + kunmap(map->pages[pgno]); 287 + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 288 + } 289 + } 290 + 291 + err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); 267 292 if (err) 268 293 return err; 269 294 270 295 for (i = 0; i < pages; i++) { 271 296 if (map->unmap_ops[offset+i].status) 272 297 err = -EINVAL; 273 - map->unmap_ops[offset+i].handle = 0; 298 + pr_debug("unmap handle=%d st=%d\n", 299 + map->unmap_ops[offset+i].handle, 300 + map->unmap_ops[offset+i].status); 301 + map->unmap_ops[offset+i].handle = -1; 274 302 } 303 + return err; 304 + } 305 + 306 + static int unmap_grant_pages(struct grant_map *map, int offset, int pages) 307 + { 308 + int range, err = 0; 309 + 310 + pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 311 + 312 + /* It is possible the requested range will have a "hole" where we 313 + * already unmapped some of the grants. Only unmap valid ranges. 314 + */ 315 + while (pages && !err) { 316 + while (pages && map->unmap_ops[offset].handle == -1) { 317 + offset++; 318 + pages--; 319 + } 320 + range = 0; 321 + while (range < pages) { 322 + if (map->unmap_ops[offset+range].handle == -1) { 323 + range--; 324 + break; 325 + } 326 + range++; 327 + } 328 + err = __unmap_grant_pages(map, offset, range); 329 + offset += range; 330 + pages -= range; 331 + } 332 + 275 333 return err; 276 334 } 277 335 ··· 335 287 struct grant_map *map = vma->vm_private_data; 336 288 337 289 pr_debug("close %p\n", vma); 338 - map->is_mapped = 0; 339 290 map->vma = NULL; 340 291 vma->vm_private_data = NULL; 341 - } 342 - 343 - static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 344 - { 345 - pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n", 346 - vmf->virtual_address, vmf->pgoff); 347 - vmf->flags = VM_FAULT_ERROR; 348 - return 0; 292 + gntdev_put_map(map); 349 293 } 350 294 351 295 static struct vm_operations_struct gntdev_vmops = { 352 296 .close = gntdev_vma_close, 353 - .fault = gntdev_vma_fault, 354 297 }; 355 298 356 299 /* ------------------------------------------------------------------ */ ··· 358 319 spin_lock(&priv->lock); 359 320 list_for_each_entry(map, &priv->maps, next) { 360 321 if (!map->vma) 361 - continue; 362 - if (!map->is_mapped) 363 322 continue; 364 323 if (map->vma->vm_start >= end) 365 324 continue; ··· 423 386 424 387 INIT_LIST_HEAD(&priv->maps); 425 388 spin_lock_init(&priv->lock); 426 - priv->limit = limit; 427 389 428 - priv->mm = get_task_mm(current); 429 - if (!priv->mm) { 430 - kfree(priv); 431 - return -ENOMEM; 390 + if (use_ptemod) { 391 + priv->mm = get_task_mm(current); 392 + if (!priv->mm) { 393 + kfree(priv); 394 + return -ENOMEM; 395 + } 396 + priv->mn.ops = &gntdev_mmu_ops; 397 + ret = mmu_notifier_register(&priv->mn, priv->mm); 398 + mmput(priv->mm); 432 399 } 433 - priv->mn.ops = &gntdev_mmu_ops; 434 - ret = mmu_notifier_register(&priv->mn, priv->mm); 435 - mmput(priv->mm); 436 400 437 401 if (ret) { 438 402 kfree(priv); ··· 450 412 { 451 413 struct gntdev_priv *priv = flip->private_data; 452 414 struct grant_map *map; 453 - int err; 454 415 455 416 pr_debug("priv %p\n", priv); 456 417 457 418 spin_lock(&priv->lock); 458 419 while (!list_empty(&priv->maps)) { 459 420 map = list_entry(priv->maps.next, struct grant_map, next); 460 - err = gntdev_del_map(map); 461 - if (WARN_ON(err)) 462 - gntdev_free_map(map); 463 - 421 + list_del(&map->next); 422 + gntdev_put_map(map); 464 423 } 465 424 spin_unlock(&priv->lock); 466 425 467 - mmu_notifier_unregister(&priv->mn, priv->mm); 426 + if (use_ptemod) 427 + mmu_notifier_unregister(&priv->mn, priv->mm); 468 428 kfree(priv); 469 429 return 0; 470 430 } ··· 479 443 pr_debug("priv %p, add %d\n", priv, op.count); 480 444 if (unlikely(op.count <= 0)) 481 445 return -EINVAL; 482 - if (unlikely(op.count > priv->limit)) 483 - return -EINVAL; 484 446 485 447 err = -ENOMEM; 486 448 map = gntdev_alloc_map(priv, op.count); 487 449 if (!map) 488 450 return err; 451 + 452 + if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { 453 + pr_debug("can't map: over limit\n"); 454 + gntdev_put_map(map); 455 + return err; 456 + } 457 + 489 458 if (copy_from_user(map->grants, &u->refs, 490 459 sizeof(map->grants[0]) * op.count) != 0) { 491 - gntdev_free_map(map); 460 + gntdev_put_map(map); 492 461 return err; 493 462 } 494 463 ··· 502 461 op.index = map->index << PAGE_SHIFT; 503 462 spin_unlock(&priv->lock); 504 463 505 - if (copy_to_user(u, &op, sizeof(op)) != 0) { 506 - spin_lock(&priv->lock); 507 - gntdev_del_map(map); 508 - spin_unlock(&priv->lock); 509 - gntdev_free_map(map); 510 - return err; 511 - } 464 + if (copy_to_user(u, &op, sizeof(op)) != 0) 465 + return -EFAULT; 466 + 512 467 return 0; 513 468 } 514 469 ··· 521 484 522 485 spin_lock(&priv->lock); 523 486 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 524 - if (map) 525 - err = gntdev_del_map(map); 487 + if (map) { 488 + list_del(&map->next); 489 + gntdev_put_map(map); 490 + err = 0; 491 + } 526 492 spin_unlock(&priv->lock); 527 - if (!err) 528 - gntdev_free_map(map); 529 493 return err; 530 494 } 531 495 ··· 534 496 struct ioctl_gntdev_get_offset_for_vaddr __user *u) 535 497 { 536 498 struct ioctl_gntdev_get_offset_for_vaddr op; 499 + struct vm_area_struct *vma; 537 500 struct grant_map *map; 538 501 539 502 if (copy_from_user(&op, u, sizeof(op)) != 0) 540 503 return -EFAULT; 541 504 pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); 542 505 543 - spin_lock(&priv->lock); 544 - map = gntdev_find_map_vaddr(priv, op.vaddr); 545 - if (map == NULL || 546 - map->vma->vm_start != op.vaddr) { 547 - spin_unlock(&priv->lock); 506 + vma = find_vma(current->mm, op.vaddr); 507 + if (!vma || vma->vm_ops != &gntdev_vmops) 548 508 return -EINVAL; 549 - } 509 + 510 + map = vma->vm_private_data; 511 + if (!map) 512 + return -EINVAL; 513 + 550 514 op.offset = map->index << PAGE_SHIFT; 551 515 op.count = map->count; 552 - spin_unlock(&priv->lock); 553 516 554 517 if (copy_to_user(u, &op, sizeof(op)) != 0) 555 518 return -EFAULT; 556 519 return 0; 557 520 } 558 521 559 - static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, 560 - struct ioctl_gntdev_set_max_grants __user *u) 522 + static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) 561 523 { 562 - struct ioctl_gntdev_set_max_grants op; 524 + struct ioctl_gntdev_unmap_notify op; 525 + struct grant_map *map; 526 + int rc; 563 527 564 - if (copy_from_user(&op, u, sizeof(op)) != 0) 528 + if (copy_from_user(&op, u, sizeof(op))) 565 529 return -EFAULT; 566 - pr_debug("priv %p, limit %d\n", priv, op.count); 567 - if (op.count > limit) 568 - return -E2BIG; 530 + 531 + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) 532 + return -EINVAL; 569 533 570 534 spin_lock(&priv->lock); 571 - priv->limit = op.count; 535 + 536 + list_for_each_entry(map, &priv->maps, next) { 537 + uint64_t begin = map->index << PAGE_SHIFT; 538 + uint64_t end = (map->index + map->count) << PAGE_SHIFT; 539 + if (op.index >= begin && op.index < end) 540 + goto found; 541 + } 542 + rc = -ENOENT; 543 + goto unlock_out; 544 + 545 + found: 546 + if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && 547 + (map->flags & GNTMAP_readonly)) { 548 + rc = -EINVAL; 549 + goto unlock_out; 550 + } 551 + 552 + map->notify.flags = op.action; 553 + map->notify.addr = op.index - (map->index << PAGE_SHIFT); 554 + map->notify.event = op.event_channel_port; 555 + rc = 0; 556 + unlock_out: 572 557 spin_unlock(&priv->lock); 573 - return 0; 558 + return rc; 574 559 } 575 560 576 561 static long gntdev_ioctl(struct file *flip, ··· 612 551 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: 613 552 return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); 614 553 615 - case IOCTL_GNTDEV_SET_MAX_GRANTS: 616 - return gntdev_ioctl_set_max_grants(priv, ptr); 554 + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: 555 + return gntdev_ioctl_notify(priv, ptr); 617 556 618 557 default: 619 558 pr_debug("priv %p, unknown cmd %x\n", priv, cmd); ··· 629 568 int index = vma->vm_pgoff; 630 569 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 631 570 struct grant_map *map; 632 - int err = -EINVAL; 571 + int i, err = -EINVAL; 633 572 634 573 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) 635 574 return -EINVAL; ··· 641 580 map = gntdev_find_map_index(priv, index, count); 642 581 if (!map) 643 582 goto unlock_out; 644 - if (map->vma) 583 + if (use_ptemod && map->vma) 645 584 goto unlock_out; 646 - if (priv->mm != vma->vm_mm) { 585 + if (use_ptemod && priv->mm != vma->vm_mm) { 647 586 printk(KERN_WARNING "Huh? Other mm?\n"); 648 587 goto unlock_out; 649 588 } 589 + 590 + atomic_inc(&map->users); 650 591 651 592 vma->vm_ops = &gntdev_vmops; 652 593 653 594 vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; 654 595 655 596 vma->vm_private_data = map; 656 - map->vma = vma; 657 597 658 - map->flags = GNTMAP_host_map | GNTMAP_application_map; 659 - if (!(vma->vm_flags & VM_WRITE)) 660 - map->flags |= GNTMAP_readonly; 598 + if (use_ptemod) 599 + map->vma = vma; 600 + 601 + if (map->flags) { 602 + if ((vma->vm_flags & VM_WRITE) && 603 + (map->flags & GNTMAP_readonly)) 604 + return -EINVAL; 605 + } else { 606 + map->flags = GNTMAP_host_map; 607 + if (!(vma->vm_flags & VM_WRITE)) 608 + map->flags |= GNTMAP_readonly; 609 + } 661 610 662 611 spin_unlock(&priv->lock); 663 612 664 - err = apply_to_page_range(vma->vm_mm, vma->vm_start, 665 - vma->vm_end - vma->vm_start, 666 - find_grant_ptes, map); 667 - if (err) { 668 - printk(KERN_WARNING "find_grant_ptes() failure.\n"); 669 - return err; 613 + if (use_ptemod) { 614 + err = apply_to_page_range(vma->vm_mm, vma->vm_start, 615 + vma->vm_end - vma->vm_start, 616 + find_grant_ptes, map); 617 + if (err) { 618 + printk(KERN_WARNING "find_grant_ptes() failure.\n"); 619 + goto out_put_map; 620 + } 670 621 } 671 622 672 623 err = map_grant_pages(map); 673 - if (err) { 674 - printk(KERN_WARNING "map_grant_pages() failure.\n"); 675 - return err; 676 - } 624 + if (err) 625 + goto out_put_map; 677 626 678 - map->is_mapped = 1; 627 + if (!use_ptemod) { 628 + for (i = 0; i < count; i++) { 629 + err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, 630 + map->pages[i]); 631 + if (err) 632 + goto out_put_map; 633 + } 634 + } 679 635 680 636 return 0; 681 637 682 638 unlock_out: 683 639 spin_unlock(&priv->lock); 640 + return err; 641 + 642 + out_put_map: 643 + if (use_ptemod) 644 + map->vma = NULL; 645 + gntdev_put_map(map); 684 646 return err; 685 647 } 686 648 ··· 729 645 730 646 if (!xen_domain()) 731 647 return -ENODEV; 648 + 649 + use_ptemod = xen_pv_domain(); 732 650 733 651 err = misc_register(&gntdev_miscdev); 734 652 if (err != 0) {
+10
drivers/xen/grant-table.c
··· 458 458 if (ret) 459 459 return ret; 460 460 461 + if (xen_feature(XENFEAT_auto_translated_physmap)) 462 + return ret; 463 + 461 464 for (i = 0; i < count; i++) { 465 + /* Do not add to override if the map failed. */ 466 + if (map_ops[i].status) 467 + continue; 468 + 462 469 /* m2p override only supported for GNTMAP_contains_pte mappings */ 463 470 if (!(map_ops[i].flags & GNTMAP_contains_pte)) 464 471 continue; ··· 488 481 489 482 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); 490 483 if (ret) 484 + return ret; 485 + 486 + if (xen_feature(XENFEAT_auto_translated_physmap)) 491 487 return ret; 492 488 493 489 for (i = 0; i < count; i++) {
+256
drivers/xen/xen-balloon.c
··· 1 + /****************************************************************************** 2 + * Xen balloon driver - enables returning/claiming memory to/from Xen. 3 + * 4 + * Copyright (c) 2003, B Dragovic 5 + * Copyright (c) 2003-2004, M Williamson, K Fraser 6 + * Copyright (c) 2005 Dan M. Smith, IBM Corporation 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License version 2 10 + * as published by the Free Software Foundation; or, when distributed 11 + * separately from the Linux kernel or incorporated into other 12 + * software packages, subject to the following license: 13 + * 14 + * Permission is hereby granted, free of charge, to any person obtaining a copy 15 + * of this source file (the "Software"), to deal in the Software without 16 + * restriction, including without limitation the rights to use, copy, modify, 17 + * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 + * and to permit persons to whom the Software is furnished to do so, subject to 19 + * the following conditions: 20 + * 21 + * The above copyright notice and this permission notice shall be included in 22 + * all copies or substantial portions of the Software. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 + * IN THE SOFTWARE. 31 + */ 32 + 33 + #include <linux/kernel.h> 34 + #include <linux/module.h> 35 + #include <linux/sysdev.h> 36 + #include <linux/capability.h> 37 + 38 + #include <xen/xen.h> 39 + #include <xen/interface/xen.h> 40 + #include <xen/balloon.h> 41 + #include <xen/xenbus.h> 42 + #include <xen/features.h> 43 + #include <xen/page.h> 44 + 45 + #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 46 + 47 + #define BALLOON_CLASS_NAME "xen_memory" 48 + 49 + static struct sys_device balloon_sysdev; 50 + 51 + static int register_balloon(struct sys_device *sysdev); 52 + 53 + static struct xenbus_watch target_watch = 54 + { 55 + .node = "memory/target" 56 + }; 57 + 58 + /* React to a change in the target key */ 59 + static void watch_target(struct xenbus_watch *watch, 60 + const char **vec, unsigned int len) 61 + { 62 + unsigned long long new_target; 63 + int err; 64 + 65 + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); 66 + if (err != 1) { 67 + /* This is ok (for domain0 at least) - so just return */ 68 + return; 69 + } 70 + 71 + /* The given memory/target value is in KiB, so it needs converting to 72 + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 73 + */ 74 + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 75 + } 76 + 77 + static int balloon_init_watcher(struct notifier_block *notifier, 78 + unsigned long event, 79 + void *data) 80 + { 81 + int err; 82 + 83 + err = register_xenbus_watch(&target_watch); 84 + if (err) 85 + printk(KERN_ERR "Failed to set balloon watcher\n"); 86 + 87 + return NOTIFY_DONE; 88 + } 89 + 90 + static struct notifier_block xenstore_notifier; 91 + 92 + static int __init balloon_init(void) 93 + { 94 + if (!xen_domain()) 95 + return -ENODEV; 96 + 97 + pr_info("xen-balloon: Initialising balloon driver.\n"); 98 + 99 + register_balloon(&balloon_sysdev); 100 + 101 + target_watch.callback = watch_target; 102 + xenstore_notifier.notifier_call = balloon_init_watcher; 103 + 104 + register_xenstore_notifier(&xenstore_notifier); 105 + 106 + return 0; 107 + } 108 + subsys_initcall(balloon_init); 109 + 110 + static void balloon_exit(void) 111 + { 112 + /* XXX - release balloon here */ 113 + return; 114 + } 115 + 116 + module_exit(balloon_exit); 117 + 118 + #define BALLOON_SHOW(name, format, args...) \ 119 + static ssize_t show_##name(struct sys_device *dev, \ 120 + struct sysdev_attribute *attr, \ 121 + char *buf) \ 122 + { \ 123 + return sprintf(buf, format, ##args); \ 124 + } \ 125 + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 126 + 127 + BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 128 + BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); 129 + BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); 130 + 131 + static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); 132 + static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); 133 + static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); 134 + static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); 135 + 136 + static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, 137 + char *buf) 138 + { 139 + return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 140 + } 141 + 142 + static ssize_t store_target_kb(struct sys_device *dev, 143 + struct sysdev_attribute *attr, 144 + const char *buf, 145 + size_t count) 146 + { 147 + char *endchar; 148 + unsigned long long target_bytes; 149 + 150 + if (!capable(CAP_SYS_ADMIN)) 151 + return -EPERM; 152 + 153 + target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; 154 + 155 + balloon_set_new_target(target_bytes >> PAGE_SHIFT); 156 + 157 + return count; 158 + } 159 + 160 + static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, 161 + show_target_kb, store_target_kb); 162 + 163 + 164 + static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, 165 + char *buf) 166 + { 167 + return sprintf(buf, "%llu\n", 168 + (unsigned long long)balloon_stats.target_pages 169 + << PAGE_SHIFT); 170 + } 171 + 172 + static ssize_t store_target(struct sys_device *dev, 173 + struct sysdev_attribute *attr, 174 + const char *buf, 175 + size_t count) 176 + { 177 + char *endchar; 178 + unsigned long long target_bytes; 179 + 180 + if (!capable(CAP_SYS_ADMIN)) 181 + return -EPERM; 182 + 183 + target_bytes = memparse(buf, &endchar); 184 + 185 + balloon_set_new_target(target_bytes >> PAGE_SHIFT); 186 + 187 + return count; 188 + } 189 + 190 + static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, 191 + show_target, store_target); 192 + 193 + 194 + static struct sysdev_attribute *balloon_attrs[] = { 195 + &attr_target_kb, 196 + &attr_target, 197 + &attr_schedule_delay.attr, 198 + &attr_max_schedule_delay.attr, 199 + &attr_retry_count.attr, 200 + &attr_max_retry_count.attr 201 + }; 202 + 203 + static struct attribute *balloon_info_attrs[] = { 204 + &attr_current_kb.attr, 205 + &attr_low_kb.attr, 206 + &attr_high_kb.attr, 207 + NULL 208 + }; 209 + 210 + static struct attribute_group balloon_info_group = { 211 + .name = "info", 212 + .attrs = balloon_info_attrs 213 + }; 214 + 215 + static struct sysdev_class balloon_sysdev_class = { 216 + .name = BALLOON_CLASS_NAME 217 + }; 218 + 219 + static int register_balloon(struct sys_device *sysdev) 220 + { 221 + int i, error; 222 + 223 + error = sysdev_class_register(&balloon_sysdev_class); 224 + if (error) 225 + return error; 226 + 227 + sysdev->id = 0; 228 + sysdev->cls = &balloon_sysdev_class; 229 + 230 + error = sysdev_register(sysdev); 231 + if (error) { 232 + sysdev_class_unregister(&balloon_sysdev_class); 233 + return error; 234 + } 235 + 236 + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { 237 + error = sysdev_create_file(sysdev, balloon_attrs[i]); 238 + if (error) 239 + goto fail; 240 + } 241 + 242 + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); 243 + if (error) 244 + goto fail; 245 + 246 + return 0; 247 + 248 + fail: 249 + while (--i >= 0) 250 + sysdev_remove_file(sysdev, balloon_attrs[i]); 251 + sysdev_unregister(sysdev); 252 + sysdev_class_unregister(&balloon_sysdev_class); 253 + return error; 254 + } 255 + 256 + MODULE_LICENSE("GPL");
+25
include/xen/balloon.h
··· 1 + /****************************************************************************** 2 + * Xen balloon functionality 3 + */ 4 + 5 + #define RETRY_UNLIMITED 0 6 + 7 + struct balloon_stats { 8 + /* We aim for 'current allocation' == 'target allocation'. */ 9 + unsigned long current_pages; 10 + unsigned long target_pages; 11 + /* Number of pages in high- and low-memory balloons. */ 12 + unsigned long balloon_low; 13 + unsigned long balloon_high; 14 + unsigned long schedule_delay; 15 + unsigned long max_schedule_delay; 16 + unsigned long retry_count; 17 + unsigned long max_retry_count; 18 + }; 19 + 20 + extern struct balloon_stats balloon_stats; 21 + 22 + void balloon_set_new_target(unsigned long target); 23 + 24 + int alloc_xenballooned_pages(int nr_pages, struct page** pages); 25 + void free_xenballooned_pages(int nr_pages, struct page** pages);
+82
include/xen/gntalloc.h
··· 1 + /****************************************************************************** 2 + * gntalloc.h 3 + * 4 + * Interface to /dev/xen/gntalloc. 5 + * 6 + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> 7 + * 8 + * This file is in the public domain. 9 + */ 10 + 11 + #ifndef __LINUX_PUBLIC_GNTALLOC_H__ 12 + #define __LINUX_PUBLIC_GNTALLOC_H__ 13 + 14 + /* 15 + * Allocates a new page and creates a new grant reference. 16 + */ 17 + #define IOCTL_GNTALLOC_ALLOC_GREF \ 18 + _IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) 19 + struct ioctl_gntalloc_alloc_gref { 20 + /* IN parameters */ 21 + /* The ID of the domain to be given access to the grants. */ 22 + uint16_t domid; 23 + /* Flags for this mapping */ 24 + uint16_t flags; 25 + /* Number of pages to map */ 26 + uint32_t count; 27 + /* OUT parameters */ 28 + /* The offset to be used on a subsequent call to mmap(). */ 29 + uint64_t index; 30 + /* The grant references of the newly created grant, one per page */ 31 + /* Variable size, depending on count */ 32 + uint32_t gref_ids[1]; 33 + }; 34 + 35 + #define GNTALLOC_FLAG_WRITABLE 1 36 + 37 + /* 38 + * Deallocates the grant reference, allowing the associated page to be freed if 39 + * no other domains are using it. 40 + */ 41 + #define IOCTL_GNTALLOC_DEALLOC_GREF \ 42 + _IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) 43 + struct ioctl_gntalloc_dealloc_gref { 44 + /* IN parameters */ 45 + /* The offset returned in the map operation */ 46 + uint64_t index; 47 + /* Number of references to unmap */ 48 + uint32_t count; 49 + }; 50 + 51 + /* 52 + * Sets up an unmap notification within the page, so that the other side can do 53 + * cleanup if this side crashes. Required to implement cross-domain robust 54 + * mutexes or close notification on communication channels. 55 + * 56 + * Each mapped page only supports one notification; multiple calls referring to 57 + * the same page overwrite the previous notification. You must clear the 58 + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it 59 + * to occur. 60 + */ 61 + #define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ 62 + _IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) 63 + struct ioctl_gntalloc_unmap_notify { 64 + /* IN parameters */ 65 + /* Offset in the file descriptor for a byte within the page (same as 66 + * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to 67 + * be cleared. Otherwise, it can be any byte in the page whose 68 + * notification we are adjusting. 69 + */ 70 + uint64_t index; 71 + /* Action(s) to take on unmap */ 72 + uint32_t action; 73 + /* Event channel to notify */ 74 + uint32_t event_channel_port; 75 + }; 76 + 77 + /* Clear (set to zero) the byte specified by index */ 78 + #define UNMAP_NOTIFY_CLEAR_BYTE 0x1 79 + /* Send an interrupt on the indicated event channel */ 80 + #define UNMAP_NOTIFY_SEND_EVENT 0x2 81 + 82 + #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
+31
include/xen/gntdev.h
··· 116 116 uint32_t count; 117 117 }; 118 118 119 + /* 120 + * Sets up an unmap notification within the page, so that the other side can do 121 + * cleanup if this side crashes. Required to implement cross-domain robust 122 + * mutexes or close notification on communication channels. 123 + * 124 + * Each mapped page only supports one notification; multiple calls referring to 125 + * the same page overwrite the previous notification. You must clear the 126 + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it 127 + * to occur. 128 + */ 129 + #define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ 130 + _IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify)) 131 + struct ioctl_gntdev_unmap_notify { 132 + /* IN parameters */ 133 + /* Offset in the file descriptor for a byte within the page (same as 134 + * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to 135 + * be cleared. Otherwise, it can be any byte in the page whose 136 + * notification we are adjusting. 137 + */ 138 + uint64_t index; 139 + /* Action(s) to take on unmap */ 140 + uint32_t action; 141 + /* Event channel to notify */ 142 + uint32_t event_channel_port; 143 + }; 144 + 145 + /* Clear (set to zero) the byte specified by index */ 146 + #define UNMAP_NOTIFY_CLEAR_BYTE 0x1 147 + /* Send an interrupt on the indicated event channel */ 148 + #define UNMAP_NOTIFY_SEND_EVENT 0x2 149 + 119 150 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */