at v5.0-rc8 1151 lines 33 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * drivers/android/staging/vsoc.c 4 * 5 * Android Virtual System on a Chip (VSoC) driver 6 * 7 * Copyright (C) 2017 Google, Inc. 8 * 9 * Author: ghartman@google.com 10 * 11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 13 * 14 * Based on cirrusfb.c and 8139cp.c: 15 * Copyright 1999-2001 Jeff Garzik 16 * Copyright 2001-2004 Jeff Garzik 17 */ 18 19#include <linux/dma-mapping.h> 20#include <linux/freezer.h> 21#include <linux/futex.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/mutex.h> 26#include <linux/pci.h> 27#include <linux/proc_fs.h> 28#include <linux/sched.h> 29#include <linux/syscalls.h> 30#include <linux/uaccess.h> 31#include <linux/interrupt.h> 32#include <linux/mutex.h> 33#include <linux/cdev.h> 34#include <linux/file.h> 35#include "uapi/vsoc_shm.h" 36 37#define VSOC_DEV_NAME "vsoc" 38 39/* 40 * Description of the ivshmem-doorbell PCI device used by QEmu. These 41 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 42 * the QEmu repository. This was last reconciled with the version that 43 * came out with 2.8 44 */ 45 46/* 47 * These constants are determined KVM Inter-VM shared memory device 48 * register offsets 49 */ 50enum { 51 INTR_MASK = 0x00, /* Interrupt Mask */ 52 INTR_STATUS = 0x04, /* Interrupt Status */ 53 IV_POSITION = 0x08, /* VM ID */ 54 DOORBELL = 0x0c, /* Doorbell */ 55}; 56 57static const int REGISTER_BAR; /* Equal to 0 */ 58static const int MAX_REGISTER_BAR_LEN = 0x100; 59/* 60 * The MSI-x BAR is not used directly. 61 * 62 * static const int MSI_X_BAR = 1; 63 */ 64static const int SHARED_MEMORY_BAR = 2; 65 66struct vsoc_region_data { 67 char name[VSOC_DEVICE_NAME_SZ + 1]; 68 wait_queue_head_t interrupt_wait_queue; 69 /* TODO(b/73664181): Use multiple futex wait queues */ 70 wait_queue_head_t futex_wait_queue; 71 /* Flag indicating that an interrupt has been signalled by the host. */ 72 atomic_t *incoming_signalled; 73 /* Flag indicating the guest has signalled the host. */ 74 atomic_t *outgoing_signalled; 75 bool irq_requested; 76 bool device_created; 77}; 78 79struct vsoc_device { 80 /* Kernel virtual address of REGISTER_BAR. */ 81 void __iomem *regs; 82 /* Physical address of SHARED_MEMORY_BAR. */ 83 phys_addr_t shm_phys_start; 84 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 85 void __iomem *kernel_mapped_shm; 86 /* Size of the entire shared memory window in bytes. */ 87 size_t shm_size; 88 /* 89 * Pointer to the virtual address of the shared memory layout structure. 90 * This is probably identical to kernel_mapped_shm, but saving this 91 * here saves a lot of annoying casts. 92 */ 93 struct vsoc_shm_layout_descriptor *layout; 94 /* 95 * Points to a table of region descriptors in the kernel's virtual 96 * address space. Calculated from 97 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 98 */ 99 struct vsoc_device_region *regions; 100 /* Head of a list of permissions that have been granted. */ 101 struct list_head permissions; 102 struct pci_dev *dev; 103 /* Per-region (and therefore per-interrupt) information. */ 104 struct vsoc_region_data *regions_data; 105 /* 106 * Table of msi-x entries. This has to be separated from struct 107 * vsoc_region_data because the kernel deals with them as an array. 108 */ 109 struct msix_entry *msix_entries; 110 /* Mutex that protectes the permission list */ 111 struct mutex mtx; 112 /* Major number assigned by the kernel */ 113 int major; 114 /* Character device assigned by the kernel */ 115 struct cdev cdev; 116 /* Device class assigned by the kernel */ 117 struct class *class; 118 /* 119 * Flags that indicate what we've initialized. These are used to do an 120 * orderly cleanup of the device. 121 */ 122 bool enabled_device; 123 bool requested_regions; 124 bool cdev_added; 125 bool class_added; 126 bool msix_enabled; 127}; 128 129static struct vsoc_device vsoc_dev; 130 131/* 132 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 133 */ 134 135struct fd_scoped_permission_node { 136 struct fd_scoped_permission permission; 137 struct list_head list; 138}; 139 140struct vsoc_private_data { 141 struct fd_scoped_permission_node *fd_scoped_permission_node; 142}; 143 144static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 145static int vsoc_mmap(struct file *, struct vm_area_struct *); 146static int vsoc_open(struct inode *, struct file *); 147static int vsoc_release(struct inode *, struct file *); 148static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 149static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 150static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 151static int 152do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 153 struct fd_scoped_permission_node *np, 154 struct fd_scoped_permission_arg __user *arg); 155static void 156do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 157 struct fd_scoped_permission *perm); 158static long do_vsoc_describe_region(struct file *, 159 struct vsoc_device_region __user *); 160static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 161 162/** 163 * Validate arguments on entry points to the driver. 164 */ 165inline int vsoc_validate_inode(struct inode *inode) 166{ 167 if (iminor(inode) >= vsoc_dev.layout->region_count) { 168 dev_err(&vsoc_dev.dev->dev, 169 "describe_region: invalid region %d\n", iminor(inode)); 170 return -ENODEV; 171 } 172 return 0; 173} 174 175inline int vsoc_validate_filep(struct file *filp) 176{ 177 int ret = vsoc_validate_inode(file_inode(filp)); 178 179 if (ret) 180 return ret; 181 if (!filp->private_data) { 182 dev_err(&vsoc_dev.dev->dev, 183 "No private data on fd, region %d\n", 184 iminor(file_inode(filp))); 185 return -EBADFD; 186 } 187 return 0; 188} 189 190/* Converts from shared memory offset to virtual address */ 191static inline void *shm_off_to_virtual_addr(__u32 offset) 192{ 193 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 194} 195 196/* Converts from shared memory offset to physical address */ 197static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 198{ 199 return vsoc_dev.shm_phys_start + offset; 200} 201 202/** 203 * Convenience functions to obtain the region from the inode or file. 204 * Dangerous to call before validating the inode/file. 205 */ 206static 207inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) 208{ 209 return &vsoc_dev.regions[iminor(inode)]; 210} 211 212static 213inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) 214{ 215 return vsoc_region_from_inode(file_inode(inode)); 216} 217 218static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 219{ 220 return r->region_end_offset - r->region_begin_offset; 221} 222 223static const struct file_operations vsoc_ops = { 224 .owner = THIS_MODULE, 225 .open = vsoc_open, 226 .mmap = vsoc_mmap, 227 .read = vsoc_read, 228 .unlocked_ioctl = vsoc_ioctl, 229 .compat_ioctl = vsoc_ioctl, 230 .write = vsoc_write, 231 .llseek = vsoc_lseek, 232 .release = vsoc_release, 233}; 234 235static struct pci_device_id vsoc_id_table[] = { 236 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 237 {0}, 238}; 239 240MODULE_DEVICE_TABLE(pci, vsoc_id_table); 241 242static void vsoc_remove_device(struct pci_dev *pdev); 243static int vsoc_probe_device(struct pci_dev *pdev, 244 const struct pci_device_id *ent); 245 246static struct pci_driver vsoc_pci_driver = { 247 .name = "vsoc", 248 .id_table = vsoc_id_table, 249 .probe = vsoc_probe_device, 250 .remove = vsoc_remove_device, 251}; 252 253static int 254do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 255 struct fd_scoped_permission_node *np, 256 struct fd_scoped_permission_arg __user *arg) 257{ 258 struct file *managed_filp; 259 s32 managed_fd; 260 atomic_t *owner_ptr = NULL; 261 struct vsoc_device_region *managed_region_p; 262 263 if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || 264 copy_from_user(&managed_fd, 265 &arg->managed_region_fd, sizeof(managed_fd))) { 266 return -EFAULT; 267 } 268 managed_filp = fdget(managed_fd).file; 269 /* Check that it's a valid fd, */ 270 if (!managed_filp || vsoc_validate_filep(managed_filp)) 271 return -EPERM; 272 /* EEXIST if the given fd already has a permission. */ 273 if (((struct vsoc_private_data *)managed_filp->private_data)-> 274 fd_scoped_permission_node) 275 return -EEXIST; 276 managed_region_p = vsoc_region_from_filep(managed_filp); 277 /* Check that the provided region is managed by this one */ 278 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 279 return -EPERM; 280 /* The area must be well formed and have non-zero size */ 281 if (np->permission.begin_offset >= np->permission.end_offset) 282 return -EINVAL; 283 /* The area must fit in the memory window */ 284 if (np->permission.end_offset > 285 vsoc_device_region_size(managed_region_p)) 286 return -ERANGE; 287 /* The area must be in the region data section */ 288 if (np->permission.begin_offset < 289 managed_region_p->offset_of_region_data) 290 return -ERANGE; 291 /* The area must be page aligned */ 292 if (!PAGE_ALIGNED(np->permission.begin_offset) || 293 !PAGE_ALIGNED(np->permission.end_offset)) 294 return -EINVAL; 295 /* Owner offset must be naturally aligned in the window */ 296 if (np->permission.owner_offset & 297 (sizeof(np->permission.owner_offset) - 1)) 298 return -EINVAL; 299 /* The owner flag must reside in the owner memory */ 300 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 301 vsoc_device_region_size(region_p)) 302 return -ERANGE; 303 /* The owner flag must reside in the data section */ 304 if (np->permission.owner_offset < region_p->offset_of_region_data) 305 return -EINVAL; 306 /* The owner value must change to claim the memory */ 307 if (np->permission.owned_value == VSOC_REGION_FREE) 308 return -EINVAL; 309 owner_ptr = 310 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 311 np->permission.owner_offset); 312 /* We've already verified that this is in the shared memory window, so 313 * it should be safe to write to this address. 314 */ 315 if (atomic_cmpxchg(owner_ptr, 316 VSOC_REGION_FREE, 317 np->permission.owned_value) != VSOC_REGION_FREE) { 318 return -EBUSY; 319 } 320 ((struct vsoc_private_data *)managed_filp->private_data)-> 321 fd_scoped_permission_node = np; 322 /* The file offset needs to be adjusted if the calling 323 * process did any read/write operations on the fd 324 * before creating the permission. 325 */ 326 if (managed_filp->f_pos) { 327 if (managed_filp->f_pos > np->permission.end_offset) { 328 /* If the offset is beyond the permission end, set it 329 * to the end. 330 */ 331 managed_filp->f_pos = np->permission.end_offset; 332 } else { 333 /* If the offset is within the permission interval 334 * keep it there otherwise reset it to zero. 335 */ 336 if (managed_filp->f_pos < np->permission.begin_offset) { 337 managed_filp->f_pos = 0; 338 } else { 339 managed_filp->f_pos -= 340 np->permission.begin_offset; 341 } 342 } 343 } 344 return 0; 345} 346 347static void 348do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, 349 struct fd_scoped_permission_node *node) 350{ 351 if (node) { 352 do_destroy_fd_scoped_permission(owner_region_p, 353 &node->permission); 354 mutex_lock(&vsoc_dev.mtx); 355 list_del(&node->list); 356 mutex_unlock(&vsoc_dev.mtx); 357 kfree(node); 358 } 359} 360 361static void 362do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 363 struct fd_scoped_permission *perm) 364{ 365 atomic_t *owner_ptr = NULL; 366 int prev = 0; 367 368 if (!perm) 369 return; 370 owner_ptr = (atomic_t *)shm_off_to_virtual_addr 371 (owner_region_p->region_begin_offset + perm->owner_offset); 372 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 373 if (prev != perm->owned_value) 374 dev_err(&vsoc_dev.dev->dev, 375 "%x-%x: owner (%s) %x: expected to be %x was %x", 376 perm->begin_offset, perm->end_offset, 377 owner_region_p->device_name, perm->owner_offset, 378 perm->owned_value, prev); 379} 380 381static long do_vsoc_describe_region(struct file *filp, 382 struct vsoc_device_region __user *dest) 383{ 384 struct vsoc_device_region *region_p; 385 int retval = vsoc_validate_filep(filp); 386 387 if (retval) 388 return retval; 389 region_p = vsoc_region_from_filep(filp); 390 if (copy_to_user(dest, region_p, sizeof(*region_p))) 391 return -EFAULT; 392 return 0; 393} 394 395/** 396 * Implements the inner logic of cond_wait. Copies to and from userspace are 397 * done in the helper function below. 398 */ 399static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 400{ 401 DEFINE_WAIT(wait); 402 u32 region_number = iminor(file_inode(filp)); 403 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 404 struct hrtimer_sleeper timeout, *to = NULL; 405 int ret = 0; 406 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 407 atomic_t *address = NULL; 408 ktime_t wake_time; 409 410 /* Ensure that the offset is aligned */ 411 if (arg->offset & (sizeof(uint32_t) - 1)) 412 return -EADDRNOTAVAIL; 413 /* Ensure that the offset is within shared memory */ 414 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 415 sizeof(uint32_t) > region_p->region_end_offset) 416 return -E2BIG; 417 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 418 arg->offset); 419 420 /* Ensure that the type of wait is valid */ 421 switch (arg->wait_type) { 422 case VSOC_WAIT_IF_EQUAL: 423 break; 424 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 425 to = &timeout; 426 break; 427 default: 428 return -EINVAL; 429 } 430 431 if (to) { 432 /* Copy the user-supplied timesec into the kernel structure. 433 * We do things this way to flatten differences between 32 bit 434 * and 64 bit timespecs. 435 */ 436 if (arg->wake_time_nsec >= NSEC_PER_SEC) 437 return -EINVAL; 438 wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); 439 440 hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, 441 HRTIMER_MODE_ABS); 442 hrtimer_set_expires_range_ns(&to->timer, wake_time, 443 current->timer_slack_ns); 444 445 hrtimer_init_sleeper(to, current); 446 } 447 448 while (1) { 449 prepare_to_wait(&data->futex_wait_queue, &wait, 450 TASK_INTERRUPTIBLE); 451 /* 452 * Check the sentinel value after prepare_to_wait. If the value 453 * changes after this check the writer will call signal, 454 * changing the task state from INTERRUPTIBLE to RUNNING. That 455 * will ensure that schedule() will eventually schedule this 456 * task. 457 */ 458 if (atomic_read(address) != arg->value) { 459 ret = 0; 460 break; 461 } 462 if (to) { 463 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 464 if (likely(to->task)) 465 freezable_schedule(); 466 hrtimer_cancel(&to->timer); 467 if (!to->task) { 468 ret = -ETIMEDOUT; 469 break; 470 } 471 } else { 472 freezable_schedule(); 473 } 474 /* Count the number of times that we woke up. This is useful 475 * for unit testing. 476 */ 477 ++arg->wakes; 478 if (signal_pending(current)) { 479 ret = -EINTR; 480 break; 481 } 482 } 483 finish_wait(&data->futex_wait_queue, &wait); 484 if (to) 485 destroy_hrtimer_on_stack(&to->timer); 486 return ret; 487} 488 489/** 490 * Handles the details of copying from/to userspace to ensure that the copies 491 * happen on all of the return paths of cond_wait. 492 */ 493static int do_vsoc_cond_wait(struct file *filp, 494 struct vsoc_cond_wait __user *untrusted_in) 495{ 496 struct vsoc_cond_wait arg; 497 int rval = 0; 498 499 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 500 return -EFAULT; 501 /* wakes is an out parameter. Initialize it to something sensible. */ 502 arg.wakes = 0; 503 rval = handle_vsoc_cond_wait(filp, &arg); 504 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 505 return -EFAULT; 506 return rval; 507} 508 509static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 510{ 511 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 512 u32 region_number = iminor(file_inode(filp)); 513 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 514 /* Ensure that the offset is aligned */ 515 if (offset & (sizeof(uint32_t) - 1)) 516 return -EADDRNOTAVAIL; 517 /* Ensure that the offset is within shared memory */ 518 if (((uint64_t)offset) + region_p->region_begin_offset + 519 sizeof(uint32_t) > region_p->region_end_offset) 520 return -E2BIG; 521 /* 522 * TODO(b/73664181): Use multiple futex wait queues. 523 * We need to wake every sleeper when the condition changes. Typically 524 * only a single thread will be waiting on the condition, but there 525 * are exceptions. The worst case is about 10 threads. 526 */ 527 wake_up_interruptible_all(&data->futex_wait_queue); 528 return 0; 529} 530 531static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 532{ 533 int rv = 0; 534 struct vsoc_device_region *region_p; 535 u32 reg_num; 536 struct vsoc_region_data *reg_data; 537 int retval = vsoc_validate_filep(filp); 538 539 if (retval) 540 return retval; 541 region_p = vsoc_region_from_filep(filp); 542 reg_num = iminor(file_inode(filp)); 543 reg_data = vsoc_dev.regions_data + reg_num; 544 switch (cmd) { 545 case VSOC_CREATE_FD_SCOPED_PERMISSION: 546 { 547 struct fd_scoped_permission_node *node = NULL; 548 549 node = kzalloc(sizeof(*node), GFP_KERNEL); 550 /* We can't allocate memory for the permission */ 551 if (!node) 552 return -ENOMEM; 553 INIT_LIST_HEAD(&node->list); 554 rv = do_create_fd_scoped_permission 555 (region_p, 556 node, 557 (struct fd_scoped_permission_arg __user *)arg); 558 if (!rv) { 559 mutex_lock(&vsoc_dev.mtx); 560 list_add(&node->list, &vsoc_dev.permissions); 561 mutex_unlock(&vsoc_dev.mtx); 562 } else { 563 kfree(node); 564 return rv; 565 } 566 } 567 break; 568 569 case VSOC_GET_FD_SCOPED_PERMISSION: 570 { 571 struct fd_scoped_permission_node *node = 572 ((struct vsoc_private_data *)filp->private_data)-> 573 fd_scoped_permission_node; 574 if (!node) 575 return -ENOENT; 576 if (copy_to_user 577 ((struct fd_scoped_permission __user *)arg, 578 &node->permission, sizeof(node->permission))) 579 return -EFAULT; 580 } 581 break; 582 583 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 584 if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { 585 writel(reg_num, vsoc_dev.regs + DOORBELL); 586 return 0; 587 } else { 588 return -EBUSY; 589 } 590 break; 591 592 case VSOC_SEND_INTERRUPT_TO_HOST: 593 writel(reg_num, vsoc_dev.regs + DOORBELL); 594 return 0; 595 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 596 wait_event_interruptible 597 (reg_data->interrupt_wait_queue, 598 (atomic_read(reg_data->incoming_signalled) != 0)); 599 break; 600 601 case VSOC_DESCRIBE_REGION: 602 return do_vsoc_describe_region 603 (filp, 604 (struct vsoc_device_region __user *)arg); 605 606 case VSOC_SELF_INTERRUPT: 607 atomic_set(reg_data->incoming_signalled, 1); 608 wake_up_interruptible(&reg_data->interrupt_wait_queue); 609 break; 610 611 case VSOC_COND_WAIT: 612 return do_vsoc_cond_wait(filp, 613 (struct vsoc_cond_wait __user *)arg); 614 case VSOC_COND_WAKE: 615 return do_vsoc_cond_wake(filp, arg); 616 617 default: 618 return -EINVAL; 619 } 620 return 0; 621} 622 623static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 624 loff_t *poffset) 625{ 626 __u32 area_off; 627 const void *area_p; 628 ssize_t area_len; 629 int retval = vsoc_validate_filep(filp); 630 631 if (retval) 632 return retval; 633 area_len = vsoc_get_area(filp, &area_off); 634 area_p = shm_off_to_virtual_addr(area_off); 635 area_p += *poffset; 636 area_len -= *poffset; 637 if (area_len <= 0) 638 return 0; 639 if (area_len < len) 640 len = area_len; 641 if (copy_to_user(buffer, area_p, len)) 642 return -EFAULT; 643 *poffset += len; 644 return len; 645} 646 647static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 648{ 649 ssize_t area_len = 0; 650 int retval = vsoc_validate_filep(filp); 651 652 if (retval) 653 return retval; 654 area_len = vsoc_get_area(filp, NULL); 655 switch (origin) { 656 case SEEK_SET: 657 break; 658 659 case SEEK_CUR: 660 if (offset > 0 && offset + filp->f_pos < 0) 661 return -EOVERFLOW; 662 offset += filp->f_pos; 663 break; 664 665 case SEEK_END: 666 if (offset > 0 && offset + area_len < 0) 667 return -EOVERFLOW; 668 offset += area_len; 669 break; 670 671 case SEEK_DATA: 672 if (offset >= area_len) 673 return -EINVAL; 674 if (offset < 0) 675 offset = 0; 676 break; 677 678 case SEEK_HOLE: 679 /* Next hole is always the end of the region, unless offset is 680 * beyond that 681 */ 682 if (offset < area_len) 683 offset = area_len; 684 break; 685 686 default: 687 return -EINVAL; 688 } 689 690 if (offset < 0 || offset > area_len) 691 return -EINVAL; 692 filp->f_pos = offset; 693 694 return offset; 695} 696 697static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 698 size_t len, loff_t *poffset) 699{ 700 __u32 area_off; 701 void *area_p; 702 ssize_t area_len; 703 int retval = vsoc_validate_filep(filp); 704 705 if (retval) 706 return retval; 707 area_len = vsoc_get_area(filp, &area_off); 708 area_p = shm_off_to_virtual_addr(area_off); 709 area_p += *poffset; 710 area_len -= *poffset; 711 if (area_len <= 0) 712 return 0; 713 if (area_len < len) 714 len = area_len; 715 if (copy_from_user(area_p, buffer, len)) 716 return -EFAULT; 717 *poffset += len; 718 return len; 719} 720 721static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 722{ 723 struct vsoc_region_data *region_data = 724 (struct vsoc_region_data *)region_data_v; 725 int reg_num = region_data - vsoc_dev.regions_data; 726 727 if (unlikely(!region_data)) 728 return IRQ_NONE; 729 730 if (unlikely(reg_num < 0 || 731 reg_num >= vsoc_dev.layout->region_count)) { 732 dev_err(&vsoc_dev.dev->dev, 733 "invalid irq @%p reg_num=0x%04x\n", 734 region_data, reg_num); 735 return IRQ_NONE; 736 } 737 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 738 dev_err(&vsoc_dev.dev->dev, 739 "irq not aligned @%p reg_num=0x%04x\n", 740 region_data, reg_num); 741 return IRQ_NONE; 742 } 743 wake_up_interruptible(&region_data->interrupt_wait_queue); 744 return IRQ_HANDLED; 745} 746 747static int vsoc_probe_device(struct pci_dev *pdev, 748 const struct pci_device_id *ent) 749{ 750 int result; 751 int i; 752 resource_size_t reg_size; 753 dev_t devt; 754 755 vsoc_dev.dev = pdev; 756 result = pci_enable_device(pdev); 757 if (result) { 758 dev_err(&pdev->dev, 759 "pci_enable_device failed %s: error %d\n", 760 pci_name(pdev), result); 761 return result; 762 } 763 vsoc_dev.enabled_device = true; 764 result = pci_request_regions(pdev, "vsoc"); 765 if (result < 0) { 766 dev_err(&pdev->dev, "pci_request_regions failed\n"); 767 vsoc_remove_device(pdev); 768 return -EBUSY; 769 } 770 vsoc_dev.requested_regions = true; 771 /* Set up the control registers in BAR 0 */ 772 reg_size = pci_resource_len(pdev, REGISTER_BAR); 773 if (reg_size > MAX_REGISTER_BAR_LEN) 774 vsoc_dev.regs = 775 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 776 else 777 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 778 779 if (!vsoc_dev.regs) { 780 dev_err(&pdev->dev, 781 "cannot map registers of size %zu\n", 782 (size_t)reg_size); 783 vsoc_remove_device(pdev); 784 return -EBUSY; 785 } 786 787 /* Map the shared memory in BAR 2 */ 788 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 789 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 790 791 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 792 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 793 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 794 if (!vsoc_dev.kernel_mapped_shm) { 795 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 796 vsoc_remove_device(pdev); 797 return -EBUSY; 798 } 799 800 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 801 vsoc_dev.kernel_mapped_shm; 802 dev_info(&pdev->dev, "major_version: %d\n", 803 vsoc_dev.layout->major_version); 804 dev_info(&pdev->dev, "minor_version: %d\n", 805 vsoc_dev.layout->minor_version); 806 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 807 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 808 if (vsoc_dev.layout->major_version != 809 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 810 dev_err(&vsoc_dev.dev->dev, 811 "driver supports only major_version %d\n", 812 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 813 vsoc_remove_device(pdev); 814 return -EBUSY; 815 } 816 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 817 VSOC_DEV_NAME); 818 if (result) { 819 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 820 vsoc_remove_device(pdev); 821 return -EBUSY; 822 } 823 vsoc_dev.major = MAJOR(devt); 824 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 825 vsoc_dev.cdev.owner = THIS_MODULE; 826 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 827 if (result) { 828 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 829 vsoc_remove_device(pdev); 830 return -EBUSY; 831 } 832 vsoc_dev.cdev_added = true; 833 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 834 if (IS_ERR(vsoc_dev.class)) { 835 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 836 vsoc_remove_device(pdev); 837 return PTR_ERR(vsoc_dev.class); 838 } 839 vsoc_dev.class_added = true; 840 vsoc_dev.regions = (struct vsoc_device_region __force *) 841 ((void *)vsoc_dev.layout + 842 vsoc_dev.layout->vsoc_region_desc_offset); 843 vsoc_dev.msix_entries = 844 kcalloc(vsoc_dev.layout->region_count, 845 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 846 if (!vsoc_dev.msix_entries) { 847 dev_err(&vsoc_dev.dev->dev, 848 "unable to allocate msix_entries\n"); 849 vsoc_remove_device(pdev); 850 return -ENOSPC; 851 } 852 vsoc_dev.regions_data = 853 kcalloc(vsoc_dev.layout->region_count, 854 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 855 if (!vsoc_dev.regions_data) { 856 dev_err(&vsoc_dev.dev->dev, 857 "unable to allocate regions' data\n"); 858 vsoc_remove_device(pdev); 859 return -ENOSPC; 860 } 861 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 862 vsoc_dev.msix_entries[i].entry = i; 863 864 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 865 vsoc_dev.layout->region_count); 866 if (result) { 867 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 868 vsoc_remove_device(pdev); 869 return -ENOSPC; 870 } 871 /* Check that all regions are well formed */ 872 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 873 const struct vsoc_device_region *region = vsoc_dev.regions + i; 874 875 if (!PAGE_ALIGNED(region->region_begin_offset) || 876 !PAGE_ALIGNED(region->region_end_offset)) { 877 dev_err(&vsoc_dev.dev->dev, 878 "region %d not aligned (%x:%x)", i, 879 region->region_begin_offset, 880 region->region_end_offset); 881 vsoc_remove_device(pdev); 882 return -EFAULT; 883 } 884 if (region->region_begin_offset >= region->region_end_offset || 885 region->region_end_offset > vsoc_dev.shm_size) { 886 dev_err(&vsoc_dev.dev->dev, 887 "region %d offsets are wrong: %x %x %zx", 888 i, region->region_begin_offset, 889 region->region_end_offset, vsoc_dev.shm_size); 890 vsoc_remove_device(pdev); 891 return -EFAULT; 892 } 893 if (region->managed_by >= vsoc_dev.layout->region_count) { 894 dev_err(&vsoc_dev.dev->dev, 895 "region %d has invalid owner: %u", 896 i, region->managed_by); 897 vsoc_remove_device(pdev); 898 return -EFAULT; 899 } 900 } 901 vsoc_dev.msix_enabled = true; 902 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 903 const struct vsoc_device_region *region = vsoc_dev.regions + i; 904 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 905 const struct vsoc_signal_table_layout *h_to_g_signal_table = 906 &region->host_to_guest_signal_table; 907 const struct vsoc_signal_table_layout *g_to_h_signal_table = 908 &region->guest_to_host_signal_table; 909 910 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 911 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 912 name_sz); 913 dev_info(&pdev->dev, "region %d name=%s\n", 914 i, vsoc_dev.regions_data[i].name); 915 init_waitqueue_head 916 (&vsoc_dev.regions_data[i].interrupt_wait_queue); 917 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 918 vsoc_dev.regions_data[i].incoming_signalled = 919 shm_off_to_virtual_addr(region->region_begin_offset) + 920 h_to_g_signal_table->interrupt_signalled_offset; 921 vsoc_dev.regions_data[i].outgoing_signalled = 922 shm_off_to_virtual_addr(region->region_begin_offset) + 923 g_to_h_signal_table->interrupt_signalled_offset; 924 result = request_irq(vsoc_dev.msix_entries[i].vector, 925 vsoc_interrupt, 0, 926 vsoc_dev.regions_data[i].name, 927 vsoc_dev.regions_data + i); 928 if (result) { 929 dev_info(&pdev->dev, 930 "request_irq failed irq=%d vector=%d\n", 931 i, vsoc_dev.msix_entries[i].vector); 932 vsoc_remove_device(pdev); 933 return -ENOSPC; 934 } 935 vsoc_dev.regions_data[i].irq_requested = true; 936 if (!device_create(vsoc_dev.class, NULL, 937 MKDEV(vsoc_dev.major, i), 938 NULL, vsoc_dev.regions_data[i].name)) { 939 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 940 vsoc_remove_device(pdev); 941 return -EBUSY; 942 } 943 vsoc_dev.regions_data[i].device_created = true; 944 } 945 return 0; 946} 947 948/* 949 * This should undo all of the allocations in the probe function in reverse 950 * order. 951 * 952 * Notes: 953 * 954 * The device may have been partially initialized, so double check 955 * that the allocations happened. 956 * 957 * This function may be called multiple times, so mark resources as freed 958 * as they are deallocated. 959 */ 960static void vsoc_remove_device(struct pci_dev *pdev) 961{ 962 int i; 963 /* 964 * pdev is the first thing to be set on probe and the last thing 965 * to be cleared here. If it's NULL then there is no cleanup. 966 */ 967 if (!pdev || !vsoc_dev.dev) 968 return; 969 dev_info(&pdev->dev, "remove_device\n"); 970 if (vsoc_dev.regions_data) { 971 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 972 if (vsoc_dev.regions_data[i].device_created) { 973 device_destroy(vsoc_dev.class, 974 MKDEV(vsoc_dev.major, i)); 975 vsoc_dev.regions_data[i].device_created = false; 976 } 977 if (vsoc_dev.regions_data[i].irq_requested) 978 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 979 vsoc_dev.regions_data[i].irq_requested = false; 980 } 981 kfree(vsoc_dev.regions_data); 982 vsoc_dev.regions_data = NULL; 983 } 984 if (vsoc_dev.msix_enabled) { 985 pci_disable_msix(pdev); 986 vsoc_dev.msix_enabled = false; 987 } 988 kfree(vsoc_dev.msix_entries); 989 vsoc_dev.msix_entries = NULL; 990 vsoc_dev.regions = NULL; 991 if (vsoc_dev.class_added) { 992 class_destroy(vsoc_dev.class); 993 vsoc_dev.class_added = false; 994 } 995 if (vsoc_dev.cdev_added) { 996 cdev_del(&vsoc_dev.cdev); 997 vsoc_dev.cdev_added = false; 998 } 999 if (vsoc_dev.major && vsoc_dev.layout) { 1000 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 1001 vsoc_dev.layout->region_count); 1002 vsoc_dev.major = 0; 1003 } 1004 vsoc_dev.layout = NULL; 1005 if (vsoc_dev.kernel_mapped_shm) { 1006 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1007 vsoc_dev.kernel_mapped_shm = NULL; 1008 } 1009 if (vsoc_dev.regs) { 1010 pci_iounmap(pdev, vsoc_dev.regs); 1011 vsoc_dev.regs = NULL; 1012 } 1013 if (vsoc_dev.requested_regions) { 1014 pci_release_regions(pdev); 1015 vsoc_dev.requested_regions = false; 1016 } 1017 if (vsoc_dev.enabled_device) { 1018 pci_disable_device(pdev); 1019 vsoc_dev.enabled_device = false; 1020 } 1021 /* Do this last: it indicates that the device is not initialized. */ 1022 vsoc_dev.dev = NULL; 1023} 1024 1025static void __exit vsoc_cleanup_module(void) 1026{ 1027 vsoc_remove_device(vsoc_dev.dev); 1028 pci_unregister_driver(&vsoc_pci_driver); 1029} 1030 1031static int __init vsoc_init_module(void) 1032{ 1033 int err = -ENOMEM; 1034 1035 INIT_LIST_HEAD(&vsoc_dev.permissions); 1036 mutex_init(&vsoc_dev.mtx); 1037 1038 err = pci_register_driver(&vsoc_pci_driver); 1039 if (err < 0) 1040 return err; 1041 return 0; 1042} 1043 1044static int vsoc_open(struct inode *inode, struct file *filp) 1045{ 1046 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1047 int ret = vsoc_validate_inode(inode); 1048 1049 if (ret) 1050 return ret; 1051 filp->private_data = 1052 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1053 if (!filp->private_data) 1054 return -ENOMEM; 1055 return 0; 1056} 1057 1058static int vsoc_release(struct inode *inode, struct file *filp) 1059{ 1060 struct vsoc_private_data *private_data = NULL; 1061 struct fd_scoped_permission_node *node = NULL; 1062 struct vsoc_device_region *owner_region_p = NULL; 1063 int retval = vsoc_validate_filep(filp); 1064 1065 if (retval) 1066 return retval; 1067 private_data = (struct vsoc_private_data *)filp->private_data; 1068 if (!private_data) 1069 return 0; 1070 1071 node = private_data->fd_scoped_permission_node; 1072 if (node) { 1073 owner_region_p = vsoc_region_from_inode(inode); 1074 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1075 owner_region_p = 1076 &vsoc_dev.regions[owner_region_p->managed_by]; 1077 } 1078 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1079 private_data->fd_scoped_permission_node = NULL; 1080 } 1081 kfree(private_data); 1082 filp->private_data = NULL; 1083 1084 return 0; 1085} 1086 1087/* 1088 * Returns the device relative offset and length of the area specified by the 1089 * fd scoped permission. If there is no fd scoped permission set, a default 1090 * permission covering the entire region is assumed, unless the region is owned 1091 * by another one, in which case the default is a permission with zero size. 1092 */ 1093static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1094{ 1095 __u32 off = 0; 1096 ssize_t length = 0; 1097 struct vsoc_device_region *region_p; 1098 struct fd_scoped_permission *perm; 1099 1100 region_p = vsoc_region_from_filep(filp); 1101 off = region_p->region_begin_offset; 1102 perm = &((struct vsoc_private_data *)filp->private_data)-> 1103 fd_scoped_permission_node->permission; 1104 if (perm) { 1105 off += perm->begin_offset; 1106 length = perm->end_offset - perm->begin_offset; 1107 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1108 /* No permission set and the regions is not owned by another, 1109 * default to full region access. 1110 */ 1111 length = vsoc_device_region_size(region_p); 1112 } else { 1113 /* return zero length, access is denied. */ 1114 length = 0; 1115 } 1116 if (area_offset) 1117 *area_offset = off; 1118 return length; 1119} 1120 1121static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1122{ 1123 unsigned long len = vma->vm_end - vma->vm_start; 1124 __u32 area_off; 1125 phys_addr_t mem_off; 1126 ssize_t area_len; 1127 int retval = vsoc_validate_filep(filp); 1128 1129 if (retval) 1130 return retval; 1131 area_len = vsoc_get_area(filp, &area_off); 1132 /* Add the requested offset */ 1133 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1134 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1135 if (area_len < len) 1136 return -EINVAL; 1137 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1138 mem_off = shm_off_to_phys_addr(area_off); 1139 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1140 len, vma->vm_page_prot)) 1141 return -EAGAIN; 1142 return 0; 1143} 1144 1145module_init(vsoc_init_module); 1146module_exit(vsoc_cleanup_module); 1147 1148MODULE_LICENSE("GPL"); 1149MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1150MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1151MODULE_VERSION("1.0");