at v5.3-rc5 1151 lines 33 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * drivers/android/staging/vsoc.c 4 * 5 * Android Virtual System on a Chip (VSoC) driver 6 * 7 * Copyright (C) 2017 Google, Inc. 8 * 9 * Author: ghartman@google.com 10 * 11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 13 * 14 * Based on cirrusfb.c and 8139cp.c: 15 * Copyright 1999-2001 Jeff Garzik 16 * Copyright 2001-2004 Jeff Garzik 17 */ 18 19#include <linux/dma-mapping.h> 20#include <linux/freezer.h> 21#include <linux/futex.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/mutex.h> 26#include <linux/pci.h> 27#include <linux/proc_fs.h> 28#include <linux/sched.h> 29#include <linux/syscalls.h> 30#include <linux/uaccess.h> 31#include <linux/interrupt.h> 32#include <linux/cdev.h> 33#include <linux/file.h> 34#include "uapi/vsoc_shm.h" 35 36#define VSOC_DEV_NAME "vsoc" 37 38/* 39 * Description of the ivshmem-doorbell PCI device used by QEmu. These 40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 41 * the QEmu repository. This was last reconciled with the version that 42 * came out with 2.8 43 */ 44 45/* 46 * These constants are determined KVM Inter-VM shared memory device 47 * register offsets 48 */ 49enum { 50 INTR_MASK = 0x00, /* Interrupt Mask */ 51 INTR_STATUS = 0x04, /* Interrupt Status */ 52 IV_POSITION = 0x08, /* VM ID */ 53 DOORBELL = 0x0c, /* Doorbell */ 54}; 55 56static const int REGISTER_BAR; /* Equal to 0 */ 57static const int MAX_REGISTER_BAR_LEN = 0x100; 58/* 59 * The MSI-x BAR is not used directly. 60 * 61 * static const int MSI_X_BAR = 1; 62 */ 63static const int SHARED_MEMORY_BAR = 2; 64 65struct vsoc_region_data { 66 char name[VSOC_DEVICE_NAME_SZ + 1]; 67 wait_queue_head_t interrupt_wait_queue; 68 /* TODO(b/73664181): Use multiple futex wait queues */ 69 wait_queue_head_t futex_wait_queue; 70 /* Flag indicating that an interrupt has been signalled by the host. */ 71 atomic_t *incoming_signalled; 72 /* Flag indicating the guest has signalled the host. */ 73 atomic_t *outgoing_signalled; 74 bool irq_requested; 75 bool device_created; 76}; 77 78struct vsoc_device { 79 /* Kernel virtual address of REGISTER_BAR. */ 80 void __iomem *regs; 81 /* Physical address of SHARED_MEMORY_BAR. */ 82 phys_addr_t shm_phys_start; 83 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 84 void __iomem *kernel_mapped_shm; 85 /* Size of the entire shared memory window in bytes. */ 86 size_t shm_size; 87 /* 88 * Pointer to the virtual address of the shared memory layout structure. 89 * This is probably identical to kernel_mapped_shm, but saving this 90 * here saves a lot of annoying casts. 91 */ 92 struct vsoc_shm_layout_descriptor *layout; 93 /* 94 * Points to a table of region descriptors in the kernel's virtual 95 * address space. Calculated from 96 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 97 */ 98 struct vsoc_device_region *regions; 99 /* Head of a list of permissions that have been granted. */ 100 struct list_head permissions; 101 struct pci_dev *dev; 102 /* Per-region (and therefore per-interrupt) information. */ 103 struct vsoc_region_data *regions_data; 104 /* 105 * Table of msi-x entries. This has to be separated from struct 106 * vsoc_region_data because the kernel deals with them as an array. 107 */ 108 struct msix_entry *msix_entries; 109 /* Mutex that protectes the permission list */ 110 struct mutex mtx; 111 /* Major number assigned by the kernel */ 112 int major; 113 /* Character device assigned by the kernel */ 114 struct cdev cdev; 115 /* Device class assigned by the kernel */ 116 struct class *class; 117 /* 118 * Flags that indicate what we've initialized. These are used to do an 119 * orderly cleanup of the device. 120 */ 121 bool enabled_device; 122 bool requested_regions; 123 bool cdev_added; 124 bool class_added; 125 bool msix_enabled; 126}; 127 128static struct vsoc_device vsoc_dev; 129 130/* 131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 132 */ 133 134struct fd_scoped_permission_node { 135 struct fd_scoped_permission permission; 136 struct list_head list; 137}; 138 139struct vsoc_private_data { 140 struct fd_scoped_permission_node *fd_scoped_permission_node; 141}; 142 143static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 144static int vsoc_mmap(struct file *, struct vm_area_struct *); 145static int vsoc_open(struct inode *, struct file *); 146static int vsoc_release(struct inode *, struct file *); 147static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 148static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 149static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 150static int 151do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 152 struct fd_scoped_permission_node *np, 153 struct fd_scoped_permission_arg __user *arg); 154static void 155do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 156 struct fd_scoped_permission *perm); 157static long do_vsoc_describe_region(struct file *, 158 struct vsoc_device_region __user *); 159static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 160 161/** 162 * Validate arguments on entry points to the driver. 163 */ 164inline int vsoc_validate_inode(struct inode *inode) 165{ 166 if (iminor(inode) >= vsoc_dev.layout->region_count) { 167 dev_err(&vsoc_dev.dev->dev, 168 "describe_region: invalid region %d\n", iminor(inode)); 169 return -ENODEV; 170 } 171 return 0; 172} 173 174inline int vsoc_validate_filep(struct file *filp) 175{ 176 int ret = vsoc_validate_inode(file_inode(filp)); 177 178 if (ret) 179 return ret; 180 if (!filp->private_data) { 181 dev_err(&vsoc_dev.dev->dev, 182 "No private data on fd, region %d\n", 183 iminor(file_inode(filp))); 184 return -EBADFD; 185 } 186 return 0; 187} 188 189/* Converts from shared memory offset to virtual address */ 190static inline void *shm_off_to_virtual_addr(__u32 offset) 191{ 192 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 193} 194 195/* Converts from shared memory offset to physical address */ 196static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 197{ 198 return vsoc_dev.shm_phys_start + offset; 199} 200 201/** 202 * Convenience functions to obtain the region from the inode or file. 203 * Dangerous to call before validating the inode/file. 204 */ 205static 206inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) 207{ 208 return &vsoc_dev.regions[iminor(inode)]; 209} 210 211static 212inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) 213{ 214 return vsoc_region_from_inode(file_inode(inode)); 215} 216 217static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 218{ 219 return r->region_end_offset - r->region_begin_offset; 220} 221 222static const struct file_operations vsoc_ops = { 223 .owner = THIS_MODULE, 224 .open = vsoc_open, 225 .mmap = vsoc_mmap, 226 .read = vsoc_read, 227 .unlocked_ioctl = vsoc_ioctl, 228 .compat_ioctl = vsoc_ioctl, 229 .write = vsoc_write, 230 .llseek = vsoc_lseek, 231 .release = vsoc_release, 232}; 233 234static struct pci_device_id vsoc_id_table[] = { 235 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 236 {0}, 237}; 238 239MODULE_DEVICE_TABLE(pci, vsoc_id_table); 240 241static void vsoc_remove_device(struct pci_dev *pdev); 242static int vsoc_probe_device(struct pci_dev *pdev, 243 const struct pci_device_id *ent); 244 245static struct pci_driver vsoc_pci_driver = { 246 .name = "vsoc", 247 .id_table = vsoc_id_table, 248 .probe = vsoc_probe_device, 249 .remove = vsoc_remove_device, 250}; 251 252static int 253do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 254 struct fd_scoped_permission_node *np, 255 struct fd_scoped_permission_arg __user *arg) 256{ 257 struct file *managed_filp; 258 s32 managed_fd; 259 atomic_t *owner_ptr = NULL; 260 struct vsoc_device_region *managed_region_p; 261 262 if (copy_from_user(&np->permission, 263 &arg->perm, sizeof(np->permission)) || 264 copy_from_user(&managed_fd, 265 &arg->managed_region_fd, sizeof(managed_fd))) { 266 return -EFAULT; 267 } 268 managed_filp = fdget(managed_fd).file; 269 /* Check that it's a valid fd, */ 270 if (!managed_filp || vsoc_validate_filep(managed_filp)) 271 return -EPERM; 272 /* EEXIST if the given fd already has a permission. */ 273 if (((struct vsoc_private_data *)managed_filp->private_data)-> 274 fd_scoped_permission_node) 275 return -EEXIST; 276 managed_region_p = vsoc_region_from_filep(managed_filp); 277 /* Check that the provided region is managed by this one */ 278 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 279 return -EPERM; 280 /* The area must be well formed and have non-zero size */ 281 if (np->permission.begin_offset >= np->permission.end_offset) 282 return -EINVAL; 283 /* The area must fit in the memory window */ 284 if (np->permission.end_offset > 285 vsoc_device_region_size(managed_region_p)) 286 return -ERANGE; 287 /* The area must be in the region data section */ 288 if (np->permission.begin_offset < 289 managed_region_p->offset_of_region_data) 290 return -ERANGE; 291 /* The area must be page aligned */ 292 if (!PAGE_ALIGNED(np->permission.begin_offset) || 293 !PAGE_ALIGNED(np->permission.end_offset)) 294 return -EINVAL; 295 /* Owner offset must be naturally aligned in the window */ 296 if (np->permission.owner_offset & 297 (sizeof(np->permission.owner_offset) - 1)) 298 return -EINVAL; 299 /* The owner flag must reside in the owner memory */ 300 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 301 vsoc_device_region_size(region_p)) 302 return -ERANGE; 303 /* The owner flag must reside in the data section */ 304 if (np->permission.owner_offset < region_p->offset_of_region_data) 305 return -EINVAL; 306 /* The owner value must change to claim the memory */ 307 if (np->permission.owned_value == VSOC_REGION_FREE) 308 return -EINVAL; 309 owner_ptr = 310 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 311 np->permission.owner_offset); 312 /* We've already verified that this is in the shared memory window, so 313 * it should be safe to write to this address. 314 */ 315 if (atomic_cmpxchg(owner_ptr, 316 VSOC_REGION_FREE, 317 np->permission.owned_value) != VSOC_REGION_FREE) { 318 return -EBUSY; 319 } 320 ((struct vsoc_private_data *)managed_filp->private_data)-> 321 fd_scoped_permission_node = np; 322 /* The file offset needs to be adjusted if the calling 323 * process did any read/write operations on the fd 324 * before creating the permission. 325 */ 326 if (managed_filp->f_pos) { 327 if (managed_filp->f_pos > np->permission.end_offset) { 328 /* If the offset is beyond the permission end, set it 329 * to the end. 330 */ 331 managed_filp->f_pos = np->permission.end_offset; 332 } else { 333 /* If the offset is within the permission interval 334 * keep it there otherwise reset it to zero. 335 */ 336 if (managed_filp->f_pos < np->permission.begin_offset) { 337 managed_filp->f_pos = 0; 338 } else { 339 managed_filp->f_pos -= 340 np->permission.begin_offset; 341 } 342 } 343 } 344 return 0; 345} 346 347static void 348do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, 349 struct fd_scoped_permission_node *node) 350{ 351 if (node) { 352 do_destroy_fd_scoped_permission(owner_region_p, 353 &node->permission); 354 mutex_lock(&vsoc_dev.mtx); 355 list_del(&node->list); 356 mutex_unlock(&vsoc_dev.mtx); 357 kfree(node); 358 } 359} 360 361static void 362do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 363 struct fd_scoped_permission *perm) 364{ 365 atomic_t *owner_ptr = NULL; 366 int prev = 0; 367 368 if (!perm) 369 return; 370 owner_ptr = (atomic_t *)shm_off_to_virtual_addr 371 (owner_region_p->region_begin_offset + perm->owner_offset); 372 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 373 if (prev != perm->owned_value) 374 dev_err(&vsoc_dev.dev->dev, 375 "%x-%x: owner (%s) %x: expected to be %x was %x", 376 perm->begin_offset, perm->end_offset, 377 owner_region_p->device_name, perm->owner_offset, 378 perm->owned_value, prev); 379} 380 381static long do_vsoc_describe_region(struct file *filp, 382 struct vsoc_device_region __user *dest) 383{ 384 struct vsoc_device_region *region_p; 385 int retval = vsoc_validate_filep(filp); 386 387 if (retval) 388 return retval; 389 region_p = vsoc_region_from_filep(filp); 390 if (copy_to_user(dest, region_p, sizeof(*region_p))) 391 return -EFAULT; 392 return 0; 393} 394 395/** 396 * Implements the inner logic of cond_wait. Copies to and from userspace are 397 * done in the helper function below. 398 */ 399static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 400{ 401 DEFINE_WAIT(wait); 402 u32 region_number = iminor(file_inode(filp)); 403 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 404 struct hrtimer_sleeper timeout, *to = NULL; 405 int ret = 0; 406 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 407 atomic_t *address = NULL; 408 ktime_t wake_time; 409 410 /* Ensure that the offset is aligned */ 411 if (arg->offset & (sizeof(uint32_t) - 1)) 412 return -EADDRNOTAVAIL; 413 /* Ensure that the offset is within shared memory */ 414 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 415 sizeof(uint32_t) > region_p->region_end_offset) 416 return -E2BIG; 417 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 418 arg->offset); 419 420 /* Ensure that the type of wait is valid */ 421 switch (arg->wait_type) { 422 case VSOC_WAIT_IF_EQUAL: 423 break; 424 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 425 to = &timeout; 426 break; 427 default: 428 return -EINVAL; 429 } 430 431 if (to) { 432 /* Copy the user-supplied timesec into the kernel structure. 433 * We do things this way to flatten differences between 32 bit 434 * and 64 bit timespecs. 435 */ 436 if (arg->wake_time_nsec >= NSEC_PER_SEC) 437 return -EINVAL; 438 wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); 439 440 hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, 441 HRTIMER_MODE_ABS); 442 hrtimer_set_expires_range_ns(&to->timer, wake_time, 443 current->timer_slack_ns); 444 445 hrtimer_init_sleeper(to, current); 446 } 447 448 while (1) { 449 prepare_to_wait(&data->futex_wait_queue, &wait, 450 TASK_INTERRUPTIBLE); 451 /* 452 * Check the sentinel value after prepare_to_wait. If the value 453 * changes after this check the writer will call signal, 454 * changing the task state from INTERRUPTIBLE to RUNNING. That 455 * will ensure that schedule() will eventually schedule this 456 * task. 457 */ 458 if (atomic_read(address) != arg->value) { 459 ret = 0; 460 break; 461 } 462 if (to) { 463 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 464 if (likely(to->task)) 465 freezable_schedule(); 466 hrtimer_cancel(&to->timer); 467 if (!to->task) { 468 ret = -ETIMEDOUT; 469 break; 470 } 471 } else { 472 freezable_schedule(); 473 } 474 /* Count the number of times that we woke up. This is useful 475 * for unit testing. 476 */ 477 ++arg->wakes; 478 if (signal_pending(current)) { 479 ret = -EINTR; 480 break; 481 } 482 } 483 finish_wait(&data->futex_wait_queue, &wait); 484 if (to) 485 destroy_hrtimer_on_stack(&to->timer); 486 return ret; 487} 488 489/** 490 * Handles the details of copying from/to userspace to ensure that the copies 491 * happen on all of the return paths of cond_wait. 492 */ 493static int do_vsoc_cond_wait(struct file *filp, 494 struct vsoc_cond_wait __user *untrusted_in) 495{ 496 struct vsoc_cond_wait arg; 497 int rval = 0; 498 499 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 500 return -EFAULT; 501 /* wakes is an out parameter. Initialize it to something sensible. */ 502 arg.wakes = 0; 503 rval = handle_vsoc_cond_wait(filp, &arg); 504 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 505 return -EFAULT; 506 return rval; 507} 508 509static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 510{ 511 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 512 u32 region_number = iminor(file_inode(filp)); 513 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 514 /* Ensure that the offset is aligned */ 515 if (offset & (sizeof(uint32_t) - 1)) 516 return -EADDRNOTAVAIL; 517 /* Ensure that the offset is within shared memory */ 518 if (((uint64_t)offset) + region_p->region_begin_offset + 519 sizeof(uint32_t) > region_p->region_end_offset) 520 return -E2BIG; 521 /* 522 * TODO(b/73664181): Use multiple futex wait queues. 523 * We need to wake every sleeper when the condition changes. Typically 524 * only a single thread will be waiting on the condition, but there 525 * are exceptions. The worst case is about 10 threads. 526 */ 527 wake_up_interruptible_all(&data->futex_wait_queue); 528 return 0; 529} 530 531static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 532{ 533 int rv = 0; 534 struct vsoc_device_region *region_p; 535 u32 reg_num; 536 struct vsoc_region_data *reg_data; 537 int retval = vsoc_validate_filep(filp); 538 539 if (retval) 540 return retval; 541 region_p = vsoc_region_from_filep(filp); 542 reg_num = iminor(file_inode(filp)); 543 reg_data = vsoc_dev.regions_data + reg_num; 544 switch (cmd) { 545 case VSOC_CREATE_FD_SCOPED_PERMISSION: 546 { 547 struct fd_scoped_permission_node *node = NULL; 548 549 node = kzalloc(sizeof(*node), GFP_KERNEL); 550 /* We can't allocate memory for the permission */ 551 if (!node) 552 return -ENOMEM; 553 INIT_LIST_HEAD(&node->list); 554 rv = do_create_fd_scoped_permission 555 (region_p, 556 node, 557 (struct fd_scoped_permission_arg __user *)arg); 558 if (!rv) { 559 mutex_lock(&vsoc_dev.mtx); 560 list_add(&node->list, &vsoc_dev.permissions); 561 mutex_unlock(&vsoc_dev.mtx); 562 } else { 563 kfree(node); 564 return rv; 565 } 566 } 567 break; 568 569 case VSOC_GET_FD_SCOPED_PERMISSION: 570 { 571 struct fd_scoped_permission_node *node = 572 ((struct vsoc_private_data *)filp->private_data)-> 573 fd_scoped_permission_node; 574 if (!node) 575 return -ENOENT; 576 if (copy_to_user 577 ((struct fd_scoped_permission __user *)arg, 578 &node->permission, sizeof(node->permission))) 579 return -EFAULT; 580 } 581 break; 582 583 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 584 if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { 585 writel(reg_num, vsoc_dev.regs + DOORBELL); 586 return 0; 587 } else { 588 return -EBUSY; 589 } 590 break; 591 592 case VSOC_SEND_INTERRUPT_TO_HOST: 593 writel(reg_num, vsoc_dev.regs + DOORBELL); 594 return 0; 595 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 596 wait_event_interruptible 597 (reg_data->interrupt_wait_queue, 598 (atomic_read(reg_data->incoming_signalled) != 0)); 599 break; 600 601 case VSOC_DESCRIBE_REGION: 602 return do_vsoc_describe_region 603 (filp, 604 (struct vsoc_device_region __user *)arg); 605 606 case VSOC_SELF_INTERRUPT: 607 atomic_set(reg_data->incoming_signalled, 1); 608 wake_up_interruptible(&reg_data->interrupt_wait_queue); 609 break; 610 611 case VSOC_COND_WAIT: 612 return do_vsoc_cond_wait(filp, 613 (struct vsoc_cond_wait __user *)arg); 614 case VSOC_COND_WAKE: 615 return do_vsoc_cond_wake(filp, arg); 616 617 default: 618 return -EINVAL; 619 } 620 return 0; 621} 622 623static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 624 loff_t *poffset) 625{ 626 __u32 area_off; 627 const void *area_p; 628 ssize_t area_len; 629 int retval = vsoc_validate_filep(filp); 630 631 if (retval) 632 return retval; 633 area_len = vsoc_get_area(filp, &area_off); 634 area_p = shm_off_to_virtual_addr(area_off); 635 area_p += *poffset; 636 area_len -= *poffset; 637 if (area_len <= 0) 638 return 0; 639 if (area_len < len) 640 len = area_len; 641 if (copy_to_user(buffer, area_p, len)) 642 return -EFAULT; 643 *poffset += len; 644 return len; 645} 646 647static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 648{ 649 ssize_t area_len = 0; 650 int retval = vsoc_validate_filep(filp); 651 652 if (retval) 653 return retval; 654 area_len = vsoc_get_area(filp, NULL); 655 switch (origin) { 656 case SEEK_SET: 657 break; 658 659 case SEEK_CUR: 660 if (offset > 0 && offset + filp->f_pos < 0) 661 return -EOVERFLOW; 662 offset += filp->f_pos; 663 break; 664 665 case SEEK_END: 666 if (offset > 0 && offset + area_len < 0) 667 return -EOVERFLOW; 668 offset += area_len; 669 break; 670 671 case SEEK_DATA: 672 if (offset >= area_len) 673 return -EINVAL; 674 if (offset < 0) 675 offset = 0; 676 break; 677 678 case SEEK_HOLE: 679 /* Next hole is always the end of the region, unless offset is 680 * beyond that 681 */ 682 if (offset < area_len) 683 offset = area_len; 684 break; 685 686 default: 687 return -EINVAL; 688 } 689 690 if (offset < 0 || offset > area_len) 691 return -EINVAL; 692 filp->f_pos = offset; 693 694 return offset; 695} 696 697static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 698 size_t len, loff_t *poffset) 699{ 700 __u32 area_off; 701 void *area_p; 702 ssize_t area_len; 703 int retval = vsoc_validate_filep(filp); 704 705 if (retval) 706 return retval; 707 area_len = vsoc_get_area(filp, &area_off); 708 area_p = shm_off_to_virtual_addr(area_off); 709 area_p += *poffset; 710 area_len -= *poffset; 711 if (area_len <= 0) 712 return 0; 713 if (area_len < len) 714 len = area_len; 715 if (copy_from_user(area_p, buffer, len)) 716 return -EFAULT; 717 *poffset += len; 718 return len; 719} 720 721static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 722{ 723 struct vsoc_region_data *region_data = 724 (struct vsoc_region_data *)region_data_v; 725 int reg_num = region_data - vsoc_dev.regions_data; 726 727 if (unlikely(!region_data)) 728 return IRQ_NONE; 729 730 if (unlikely(reg_num < 0 || 731 reg_num >= vsoc_dev.layout->region_count)) { 732 dev_err(&vsoc_dev.dev->dev, 733 "invalid irq @%p reg_num=0x%04x\n", 734 region_data, reg_num); 735 return IRQ_NONE; 736 } 737 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 738 dev_err(&vsoc_dev.dev->dev, 739 "irq not aligned @%p reg_num=0x%04x\n", 740 region_data, reg_num); 741 return IRQ_NONE; 742 } 743 wake_up_interruptible(&region_data->interrupt_wait_queue); 744 return IRQ_HANDLED; 745} 746 747static int vsoc_probe_device(struct pci_dev *pdev, 748 const struct pci_device_id *ent) 749{ 750 int result; 751 int i; 752 resource_size_t reg_size; 753 dev_t devt; 754 755 vsoc_dev.dev = pdev; 756 result = pci_enable_device(pdev); 757 if (result) { 758 dev_err(&pdev->dev, 759 "pci_enable_device failed %s: error %d\n", 760 pci_name(pdev), result); 761 return result; 762 } 763 vsoc_dev.enabled_device = true; 764 result = pci_request_regions(pdev, "vsoc"); 765 if (result < 0) { 766 dev_err(&pdev->dev, "pci_request_regions failed\n"); 767 vsoc_remove_device(pdev); 768 return -EBUSY; 769 } 770 vsoc_dev.requested_regions = true; 771 /* Set up the control registers in BAR 0 */ 772 reg_size = pci_resource_len(pdev, REGISTER_BAR); 773 if (reg_size > MAX_REGISTER_BAR_LEN) 774 vsoc_dev.regs = 775 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 776 else 777 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 778 779 if (!vsoc_dev.regs) { 780 dev_err(&pdev->dev, 781 "cannot map registers of size %zu\n", 782 (size_t)reg_size); 783 vsoc_remove_device(pdev); 784 return -EBUSY; 785 } 786 787 /* Map the shared memory in BAR 2 */ 788 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 789 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 790 791 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 792 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 793 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 794 if (!vsoc_dev.kernel_mapped_shm) { 795 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 796 vsoc_remove_device(pdev); 797 return -EBUSY; 798 } 799 800 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 801 vsoc_dev.kernel_mapped_shm; 802 dev_info(&pdev->dev, "major_version: %d\n", 803 vsoc_dev.layout->major_version); 804 dev_info(&pdev->dev, "minor_version: %d\n", 805 vsoc_dev.layout->minor_version); 806 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 807 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 808 if (vsoc_dev.layout->major_version != 809 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 810 dev_err(&vsoc_dev.dev->dev, 811 "driver supports only major_version %d\n", 812 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 813 vsoc_remove_device(pdev); 814 return -EBUSY; 815 } 816 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 817 VSOC_DEV_NAME); 818 if (result) { 819 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 820 vsoc_remove_device(pdev); 821 return -EBUSY; 822 } 823 vsoc_dev.major = MAJOR(devt); 824 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 825 vsoc_dev.cdev.owner = THIS_MODULE; 826 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 827 if (result) { 828 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 829 vsoc_remove_device(pdev); 830 return -EBUSY; 831 } 832 vsoc_dev.cdev_added = true; 833 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 834 if (IS_ERR(vsoc_dev.class)) { 835 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 836 vsoc_remove_device(pdev); 837 return PTR_ERR(vsoc_dev.class); 838 } 839 vsoc_dev.class_added = true; 840 vsoc_dev.regions = (struct vsoc_device_region __force *) 841 ((void *)vsoc_dev.layout + 842 vsoc_dev.layout->vsoc_region_desc_offset); 843 vsoc_dev.msix_entries = 844 kcalloc(vsoc_dev.layout->region_count, 845 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 846 if (!vsoc_dev.msix_entries) { 847 dev_err(&vsoc_dev.dev->dev, 848 "unable to allocate msix_entries\n"); 849 vsoc_remove_device(pdev); 850 return -ENOSPC; 851 } 852 vsoc_dev.regions_data = 853 kcalloc(vsoc_dev.layout->region_count, 854 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 855 if (!vsoc_dev.regions_data) { 856 dev_err(&vsoc_dev.dev->dev, 857 "unable to allocate regions' data\n"); 858 vsoc_remove_device(pdev); 859 return -ENOSPC; 860 } 861 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 862 vsoc_dev.msix_entries[i].entry = i; 863 864 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 865 vsoc_dev.layout->region_count); 866 if (result) { 867 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 868 vsoc_remove_device(pdev); 869 return -ENOSPC; 870 } 871 /* Check that all regions are well formed */ 872 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 873 const struct vsoc_device_region *region = vsoc_dev.regions + i; 874 875 if (!PAGE_ALIGNED(region->region_begin_offset) || 876 !PAGE_ALIGNED(region->region_end_offset)) { 877 dev_err(&vsoc_dev.dev->dev, 878 "region %d not aligned (%x:%x)", i, 879 region->region_begin_offset, 880 region->region_end_offset); 881 vsoc_remove_device(pdev); 882 return -EFAULT; 883 } 884 if (region->region_begin_offset >= region->region_end_offset || 885 region->region_end_offset > vsoc_dev.shm_size) { 886 dev_err(&vsoc_dev.dev->dev, 887 "region %d offsets are wrong: %x %x %zx", 888 i, region->region_begin_offset, 889 region->region_end_offset, vsoc_dev.shm_size); 890 vsoc_remove_device(pdev); 891 return -EFAULT; 892 } 893 if (region->managed_by >= vsoc_dev.layout->region_count) { 894 dev_err(&vsoc_dev.dev->dev, 895 "region %d has invalid owner: %u", 896 i, region->managed_by); 897 vsoc_remove_device(pdev); 898 return -EFAULT; 899 } 900 } 901 vsoc_dev.msix_enabled = true; 902 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 903 const struct vsoc_device_region *region = vsoc_dev.regions + i; 904 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 905 const struct vsoc_signal_table_layout *h_to_g_signal_table = 906 &region->host_to_guest_signal_table; 907 const struct vsoc_signal_table_layout *g_to_h_signal_table = 908 &region->guest_to_host_signal_table; 909 910 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 911 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 912 name_sz); 913 dev_info(&pdev->dev, "region %d name=%s\n", 914 i, vsoc_dev.regions_data[i].name); 915 init_waitqueue_head 916 (&vsoc_dev.regions_data[i].interrupt_wait_queue); 917 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 918 vsoc_dev.regions_data[i].incoming_signalled = 919 shm_off_to_virtual_addr(region->region_begin_offset) + 920 h_to_g_signal_table->interrupt_signalled_offset; 921 vsoc_dev.regions_data[i].outgoing_signalled = 922 shm_off_to_virtual_addr(region->region_begin_offset) + 923 g_to_h_signal_table->interrupt_signalled_offset; 924 result = request_irq(vsoc_dev.msix_entries[i].vector, 925 vsoc_interrupt, 0, 926 vsoc_dev.regions_data[i].name, 927 vsoc_dev.regions_data + i); 928 if (result) { 929 dev_info(&pdev->dev, 930 "request_irq failed irq=%d vector=%d\n", 931 i, vsoc_dev.msix_entries[i].vector); 932 vsoc_remove_device(pdev); 933 return -ENOSPC; 934 } 935 vsoc_dev.regions_data[i].irq_requested = true; 936 if (!device_create(vsoc_dev.class, NULL, 937 MKDEV(vsoc_dev.major, i), 938 NULL, vsoc_dev.regions_data[i].name)) { 939 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 940 vsoc_remove_device(pdev); 941 return -EBUSY; 942 } 943 vsoc_dev.regions_data[i].device_created = true; 944 } 945 return 0; 946} 947 948/* 949 * This should undo all of the allocations in the probe function in reverse 950 * order. 951 * 952 * Notes: 953 * 954 * The device may have been partially initialized, so double check 955 * that the allocations happened. 956 * 957 * This function may be called multiple times, so mark resources as freed 958 * as they are deallocated. 959 */ 960static void vsoc_remove_device(struct pci_dev *pdev) 961{ 962 int i; 963 /* 964 * pdev is the first thing to be set on probe and the last thing 965 * to be cleared here. If it's NULL then there is no cleanup. 966 */ 967 if (!pdev || !vsoc_dev.dev) 968 return; 969 dev_info(&pdev->dev, "remove_device\n"); 970 if (vsoc_dev.regions_data) { 971 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 972 if (vsoc_dev.regions_data[i].device_created) { 973 device_destroy(vsoc_dev.class, 974 MKDEV(vsoc_dev.major, i)); 975 vsoc_dev.regions_data[i].device_created = false; 976 } 977 if (vsoc_dev.regions_data[i].irq_requested) 978 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 979 vsoc_dev.regions_data[i].irq_requested = false; 980 } 981 kfree(vsoc_dev.regions_data); 982 vsoc_dev.regions_data = NULL; 983 } 984 if (vsoc_dev.msix_enabled) { 985 pci_disable_msix(pdev); 986 vsoc_dev.msix_enabled = false; 987 } 988 kfree(vsoc_dev.msix_entries); 989 vsoc_dev.msix_entries = NULL; 990 vsoc_dev.regions = NULL; 991 if (vsoc_dev.class_added) { 992 class_destroy(vsoc_dev.class); 993 vsoc_dev.class_added = false; 994 } 995 if (vsoc_dev.cdev_added) { 996 cdev_del(&vsoc_dev.cdev); 997 vsoc_dev.cdev_added = false; 998 } 999 if (vsoc_dev.major && vsoc_dev.layout) { 1000 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 1001 vsoc_dev.layout->region_count); 1002 vsoc_dev.major = 0; 1003 } 1004 vsoc_dev.layout = NULL; 1005 if (vsoc_dev.kernel_mapped_shm) { 1006 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1007 vsoc_dev.kernel_mapped_shm = NULL; 1008 } 1009 if (vsoc_dev.regs) { 1010 pci_iounmap(pdev, vsoc_dev.regs); 1011 vsoc_dev.regs = NULL; 1012 } 1013 if (vsoc_dev.requested_regions) { 1014 pci_release_regions(pdev); 1015 vsoc_dev.requested_regions = false; 1016 } 1017 if (vsoc_dev.enabled_device) { 1018 pci_disable_device(pdev); 1019 vsoc_dev.enabled_device = false; 1020 } 1021 /* Do this last: it indicates that the device is not initialized. */ 1022 vsoc_dev.dev = NULL; 1023} 1024 1025static void __exit vsoc_cleanup_module(void) 1026{ 1027 vsoc_remove_device(vsoc_dev.dev); 1028 pci_unregister_driver(&vsoc_pci_driver); 1029} 1030 1031static int __init vsoc_init_module(void) 1032{ 1033 int err = -ENOMEM; 1034 1035 INIT_LIST_HEAD(&vsoc_dev.permissions); 1036 mutex_init(&vsoc_dev.mtx); 1037 1038 err = pci_register_driver(&vsoc_pci_driver); 1039 if (err < 0) 1040 return err; 1041 return 0; 1042} 1043 1044static int vsoc_open(struct inode *inode, struct file *filp) 1045{ 1046 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1047 int ret = vsoc_validate_inode(inode); 1048 1049 if (ret) 1050 return ret; 1051 filp->private_data = 1052 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1053 if (!filp->private_data) 1054 return -ENOMEM; 1055 return 0; 1056} 1057 1058static int vsoc_release(struct inode *inode, struct file *filp) 1059{ 1060 struct vsoc_private_data *private_data = NULL; 1061 struct fd_scoped_permission_node *node = NULL; 1062 struct vsoc_device_region *owner_region_p = NULL; 1063 int retval = vsoc_validate_filep(filp); 1064 1065 if (retval) 1066 return retval; 1067 private_data = (struct vsoc_private_data *)filp->private_data; 1068 if (!private_data) 1069 return 0; 1070 1071 node = private_data->fd_scoped_permission_node; 1072 if (node) { 1073 owner_region_p = vsoc_region_from_inode(inode); 1074 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1075 owner_region_p = 1076 &vsoc_dev.regions[owner_region_p->managed_by]; 1077 } 1078 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1079 private_data->fd_scoped_permission_node = NULL; 1080 } 1081 kfree(private_data); 1082 filp->private_data = NULL; 1083 1084 return 0; 1085} 1086 1087/* 1088 * Returns the device relative offset and length of the area specified by the 1089 * fd scoped permission. If there is no fd scoped permission set, a default 1090 * permission covering the entire region is assumed, unless the region is owned 1091 * by another one, in which case the default is a permission with zero size. 1092 */ 1093static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1094{ 1095 __u32 off = 0; 1096 ssize_t length = 0; 1097 struct vsoc_device_region *region_p; 1098 struct fd_scoped_permission *perm; 1099 1100 region_p = vsoc_region_from_filep(filp); 1101 off = region_p->region_begin_offset; 1102 perm = &((struct vsoc_private_data *)filp->private_data)-> 1103 fd_scoped_permission_node->permission; 1104 if (perm) { 1105 off += perm->begin_offset; 1106 length = perm->end_offset - perm->begin_offset; 1107 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1108 /* No permission set and the regions is not owned by another, 1109 * default to full region access. 1110 */ 1111 length = vsoc_device_region_size(region_p); 1112 } else { 1113 /* return zero length, access is denied. */ 1114 length = 0; 1115 } 1116 if (area_offset) 1117 *area_offset = off; 1118 return length; 1119} 1120 1121static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1122{ 1123 unsigned long len = vma->vm_end - vma->vm_start; 1124 __u32 area_off; 1125 phys_addr_t mem_off; 1126 ssize_t area_len; 1127 int retval = vsoc_validate_filep(filp); 1128 1129 if (retval) 1130 return retval; 1131 area_len = vsoc_get_area(filp, &area_off); 1132 /* Add the requested offset */ 1133 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1134 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1135 if (area_len < len) 1136 return -EINVAL; 1137 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1138 mem_off = shm_off_to_phys_addr(area_off); 1139 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1140 len, vma->vm_page_prot)) 1141 return -EAGAIN; 1142 return 0; 1143} 1144 1145module_init(vsoc_init_module); 1146module_exit(vsoc_cleanup_module); 1147 1148MODULE_LICENSE("GPL"); 1149MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1150MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1151MODULE_VERSION("1.0");