at v5.5-rc4 1149 lines 33 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * drivers/android/staging/vsoc.c 4 * 5 * Android Virtual System on a Chip (VSoC) driver 6 * 7 * Copyright (C) 2017 Google, Inc. 8 * 9 * Author: ghartman@google.com 10 * 11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 13 * 14 * Based on cirrusfb.c and 8139cp.c: 15 * Copyright 1999-2001 Jeff Garzik 16 * Copyright 2001-2004 Jeff Garzik 17 */ 18 19#include <linux/dma-mapping.h> 20#include <linux/freezer.h> 21#include <linux/futex.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/mutex.h> 26#include <linux/pci.h> 27#include <linux/proc_fs.h> 28#include <linux/sched.h> 29#include <linux/syscalls.h> 30#include <linux/uaccess.h> 31#include <linux/interrupt.h> 32#include <linux/cdev.h> 33#include <linux/file.h> 34#include "uapi/vsoc_shm.h" 35 36#define VSOC_DEV_NAME "vsoc" 37 38/* 39 * Description of the ivshmem-doorbell PCI device used by QEmu. These 40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 41 * the QEmu repository. This was last reconciled with the version that 42 * came out with 2.8 43 */ 44 45/* 46 * These constants are determined KVM Inter-VM shared memory device 47 * register offsets 48 */ 49enum { 50 INTR_MASK = 0x00, /* Interrupt Mask */ 51 INTR_STATUS = 0x04, /* Interrupt Status */ 52 IV_POSITION = 0x08, /* VM ID */ 53 DOORBELL = 0x0c, /* Doorbell */ 54}; 55 56static const int REGISTER_BAR; /* Equal to 0 */ 57static const int MAX_REGISTER_BAR_LEN = 0x100; 58/* 59 * The MSI-x BAR is not used directly. 60 * 61 * static const int MSI_X_BAR = 1; 62 */ 63static const int SHARED_MEMORY_BAR = 2; 64 65struct vsoc_region_data { 66 char name[VSOC_DEVICE_NAME_SZ + 1]; 67 wait_queue_head_t interrupt_wait_queue; 68 /* TODO(b/73664181): Use multiple futex wait queues */ 69 wait_queue_head_t futex_wait_queue; 70 /* Flag indicating that an interrupt has been signalled by the host. */ 71 atomic_t *incoming_signalled; 72 /* Flag indicating the guest has signalled the host. */ 73 atomic_t *outgoing_signalled; 74 bool irq_requested; 75 bool device_created; 76}; 77 78struct vsoc_device { 79 /* Kernel virtual address of REGISTER_BAR. */ 80 void __iomem *regs; 81 /* Physical address of SHARED_MEMORY_BAR. */ 82 phys_addr_t shm_phys_start; 83 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 84 void __iomem *kernel_mapped_shm; 85 /* Size of the entire shared memory window in bytes. */ 86 size_t shm_size; 87 /* 88 * Pointer to the virtual address of the shared memory layout structure. 89 * This is probably identical to kernel_mapped_shm, but saving this 90 * here saves a lot of annoying casts. 91 */ 92 struct vsoc_shm_layout_descriptor *layout; 93 /* 94 * Points to a table of region descriptors in the kernel's virtual 95 * address space. Calculated from 96 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 97 */ 98 struct vsoc_device_region *regions; 99 /* Head of a list of permissions that have been granted. */ 100 struct list_head permissions; 101 struct pci_dev *dev; 102 /* Per-region (and therefore per-interrupt) information. */ 103 struct vsoc_region_data *regions_data; 104 /* 105 * Table of msi-x entries. This has to be separated from struct 106 * vsoc_region_data because the kernel deals with them as an array. 107 */ 108 struct msix_entry *msix_entries; 109 /* Mutex that protectes the permission list */ 110 struct mutex mtx; 111 /* Major number assigned by the kernel */ 112 int major; 113 /* Character device assigned by the kernel */ 114 struct cdev cdev; 115 /* Device class assigned by the kernel */ 116 struct class *class; 117 /* 118 * Flags that indicate what we've initialized. These are used to do an 119 * orderly cleanup of the device. 120 */ 121 bool enabled_device; 122 bool requested_regions; 123 bool cdev_added; 124 bool class_added; 125 bool msix_enabled; 126}; 127 128static struct vsoc_device vsoc_dev; 129 130/* 131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 132 */ 133 134struct fd_scoped_permission_node { 135 struct fd_scoped_permission permission; 136 struct list_head list; 137}; 138 139struct vsoc_private_data { 140 struct fd_scoped_permission_node *fd_scoped_permission_node; 141}; 142 143static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 144static int vsoc_mmap(struct file *, struct vm_area_struct *); 145static int vsoc_open(struct inode *, struct file *); 146static int vsoc_release(struct inode *, struct file *); 147static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 148static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 149static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 150static int 151do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 152 struct fd_scoped_permission_node *np, 153 struct fd_scoped_permission_arg __user *arg); 154static void 155do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 156 struct fd_scoped_permission *perm); 157static long do_vsoc_describe_region(struct file *, 158 struct vsoc_device_region __user *); 159static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 160 161/** 162 * Validate arguments on entry points to the driver. 163 */ 164inline int vsoc_validate_inode(struct inode *inode) 165{ 166 if (iminor(inode) >= vsoc_dev.layout->region_count) { 167 dev_err(&vsoc_dev.dev->dev, 168 "describe_region: invalid region %d\n", iminor(inode)); 169 return -ENODEV; 170 } 171 return 0; 172} 173 174inline int vsoc_validate_filep(struct file *filp) 175{ 176 int ret = vsoc_validate_inode(file_inode(filp)); 177 178 if (ret) 179 return ret; 180 if (!filp->private_data) { 181 dev_err(&vsoc_dev.dev->dev, 182 "No private data on fd, region %d\n", 183 iminor(file_inode(filp))); 184 return -EBADFD; 185 } 186 return 0; 187} 188 189/* Converts from shared memory offset to virtual address */ 190static inline void *shm_off_to_virtual_addr(__u32 offset) 191{ 192 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 193} 194 195/* Converts from shared memory offset to physical address */ 196static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 197{ 198 return vsoc_dev.shm_phys_start + offset; 199} 200 201/** 202 * Convenience functions to obtain the region from the inode or file. 203 * Dangerous to call before validating the inode/file. 204 */ 205static 206inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) 207{ 208 return &vsoc_dev.regions[iminor(inode)]; 209} 210 211static 212inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) 213{ 214 return vsoc_region_from_inode(file_inode(inode)); 215} 216 217static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 218{ 219 return r->region_end_offset - r->region_begin_offset; 220} 221 222static const struct file_operations vsoc_ops = { 223 .owner = THIS_MODULE, 224 .open = vsoc_open, 225 .mmap = vsoc_mmap, 226 .read = vsoc_read, 227 .unlocked_ioctl = vsoc_ioctl, 228 .compat_ioctl = vsoc_ioctl, 229 .write = vsoc_write, 230 .llseek = vsoc_lseek, 231 .release = vsoc_release, 232}; 233 234static struct pci_device_id vsoc_id_table[] = { 235 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 236 {0}, 237}; 238 239MODULE_DEVICE_TABLE(pci, vsoc_id_table); 240 241static void vsoc_remove_device(struct pci_dev *pdev); 242static int vsoc_probe_device(struct pci_dev *pdev, 243 const struct pci_device_id *ent); 244 245static struct pci_driver vsoc_pci_driver = { 246 .name = "vsoc", 247 .id_table = vsoc_id_table, 248 .probe = vsoc_probe_device, 249 .remove = vsoc_remove_device, 250}; 251 252static int 253do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 254 struct fd_scoped_permission_node *np, 255 struct fd_scoped_permission_arg __user *arg) 256{ 257 struct file *managed_filp; 258 s32 managed_fd; 259 atomic_t *owner_ptr = NULL; 260 struct vsoc_device_region *managed_region_p; 261 262 if (copy_from_user(&np->permission, 263 &arg->perm, sizeof(np->permission)) || 264 copy_from_user(&managed_fd, 265 &arg->managed_region_fd, sizeof(managed_fd))) { 266 return -EFAULT; 267 } 268 managed_filp = fdget(managed_fd).file; 269 /* Check that it's a valid fd, */ 270 if (!managed_filp || vsoc_validate_filep(managed_filp)) 271 return -EPERM; 272 /* EEXIST if the given fd already has a permission. */ 273 if (((struct vsoc_private_data *)managed_filp->private_data)-> 274 fd_scoped_permission_node) 275 return -EEXIST; 276 managed_region_p = vsoc_region_from_filep(managed_filp); 277 /* Check that the provided region is managed by this one */ 278 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 279 return -EPERM; 280 /* The area must be well formed and have non-zero size */ 281 if (np->permission.begin_offset >= np->permission.end_offset) 282 return -EINVAL; 283 /* The area must fit in the memory window */ 284 if (np->permission.end_offset > 285 vsoc_device_region_size(managed_region_p)) 286 return -ERANGE; 287 /* The area must be in the region data section */ 288 if (np->permission.begin_offset < 289 managed_region_p->offset_of_region_data) 290 return -ERANGE; 291 /* The area must be page aligned */ 292 if (!PAGE_ALIGNED(np->permission.begin_offset) || 293 !PAGE_ALIGNED(np->permission.end_offset)) 294 return -EINVAL; 295 /* Owner offset must be naturally aligned in the window */ 296 if (np->permission.owner_offset & 297 (sizeof(np->permission.owner_offset) - 1)) 298 return -EINVAL; 299 /* The owner flag must reside in the owner memory */ 300 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 301 vsoc_device_region_size(region_p)) 302 return -ERANGE; 303 /* The owner flag must reside in the data section */ 304 if (np->permission.owner_offset < region_p->offset_of_region_data) 305 return -EINVAL; 306 /* The owner value must change to claim the memory */ 307 if (np->permission.owned_value == VSOC_REGION_FREE) 308 return -EINVAL; 309 owner_ptr = 310 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 311 np->permission.owner_offset); 312 /* We've already verified that this is in the shared memory window, so 313 * it should be safe to write to this address. 314 */ 315 if (atomic_cmpxchg(owner_ptr, 316 VSOC_REGION_FREE, 317 np->permission.owned_value) != VSOC_REGION_FREE) { 318 return -EBUSY; 319 } 320 ((struct vsoc_private_data *)managed_filp->private_data)-> 321 fd_scoped_permission_node = np; 322 /* The file offset needs to be adjusted if the calling 323 * process did any read/write operations on the fd 324 * before creating the permission. 325 */ 326 if (managed_filp->f_pos) { 327 if (managed_filp->f_pos > np->permission.end_offset) { 328 /* If the offset is beyond the permission end, set it 329 * to the end. 330 */ 331 managed_filp->f_pos = np->permission.end_offset; 332 } else { 333 /* If the offset is within the permission interval 334 * keep it there otherwise reset it to zero. 335 */ 336 if (managed_filp->f_pos < np->permission.begin_offset) { 337 managed_filp->f_pos = 0; 338 } else { 339 managed_filp->f_pos -= 340 np->permission.begin_offset; 341 } 342 } 343 } 344 return 0; 345} 346 347static void 348do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, 349 struct fd_scoped_permission_node *node) 350{ 351 if (node) { 352 do_destroy_fd_scoped_permission(owner_region_p, 353 &node->permission); 354 mutex_lock(&vsoc_dev.mtx); 355 list_del(&node->list); 356 mutex_unlock(&vsoc_dev.mtx); 357 kfree(node); 358 } 359} 360 361static void 362do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 363 struct fd_scoped_permission *perm) 364{ 365 atomic_t *owner_ptr = NULL; 366 int prev = 0; 367 368 if (!perm) 369 return; 370 owner_ptr = (atomic_t *)shm_off_to_virtual_addr 371 (owner_region_p->region_begin_offset + perm->owner_offset); 372 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 373 if (prev != perm->owned_value) 374 dev_err(&vsoc_dev.dev->dev, 375 "%x-%x: owner (%s) %x: expected to be %x was %x", 376 perm->begin_offset, perm->end_offset, 377 owner_region_p->device_name, perm->owner_offset, 378 perm->owned_value, prev); 379} 380 381static long do_vsoc_describe_region(struct file *filp, 382 struct vsoc_device_region __user *dest) 383{ 384 struct vsoc_device_region *region_p; 385 int retval = vsoc_validate_filep(filp); 386 387 if (retval) 388 return retval; 389 region_p = vsoc_region_from_filep(filp); 390 if (copy_to_user(dest, region_p, sizeof(*region_p))) 391 return -EFAULT; 392 return 0; 393} 394 395/** 396 * Implements the inner logic of cond_wait. Copies to and from userspace are 397 * done in the helper function below. 398 */ 399static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 400{ 401 DEFINE_WAIT(wait); 402 u32 region_number = iminor(file_inode(filp)); 403 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 404 struct hrtimer_sleeper timeout, *to = NULL; 405 int ret = 0; 406 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 407 atomic_t *address = NULL; 408 ktime_t wake_time; 409 410 /* Ensure that the offset is aligned */ 411 if (arg->offset & (sizeof(uint32_t) - 1)) 412 return -EADDRNOTAVAIL; 413 /* Ensure that the offset is within shared memory */ 414 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 415 sizeof(uint32_t) > region_p->region_end_offset) 416 return -E2BIG; 417 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 418 arg->offset); 419 420 /* Ensure that the type of wait is valid */ 421 switch (arg->wait_type) { 422 case VSOC_WAIT_IF_EQUAL: 423 break; 424 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 425 to = &timeout; 426 break; 427 default: 428 return -EINVAL; 429 } 430 431 if (to) { 432 /* Copy the user-supplied timesec into the kernel structure. 433 * We do things this way to flatten differences between 32 bit 434 * and 64 bit timespecs. 435 */ 436 if (arg->wake_time_nsec >= NSEC_PER_SEC) 437 return -EINVAL; 438 wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); 439 440 hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC, 441 HRTIMER_MODE_ABS); 442 hrtimer_set_expires_range_ns(&to->timer, wake_time, 443 current->timer_slack_ns); 444 } 445 446 while (1) { 447 prepare_to_wait(&data->futex_wait_queue, &wait, 448 TASK_INTERRUPTIBLE); 449 /* 450 * Check the sentinel value after prepare_to_wait. If the value 451 * changes after this check the writer will call signal, 452 * changing the task state from INTERRUPTIBLE to RUNNING. That 453 * will ensure that schedule() will eventually schedule this 454 * task. 455 */ 456 if (atomic_read(address) != arg->value) { 457 ret = 0; 458 break; 459 } 460 if (to) { 461 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); 462 if (likely(to->task)) 463 freezable_schedule(); 464 hrtimer_cancel(&to->timer); 465 if (!to->task) { 466 ret = -ETIMEDOUT; 467 break; 468 } 469 } else { 470 freezable_schedule(); 471 } 472 /* Count the number of times that we woke up. This is useful 473 * for unit testing. 474 */ 475 ++arg->wakes; 476 if (signal_pending(current)) { 477 ret = -EINTR; 478 break; 479 } 480 } 481 finish_wait(&data->futex_wait_queue, &wait); 482 if (to) 483 destroy_hrtimer_on_stack(&to->timer); 484 return ret; 485} 486 487/** 488 * Handles the details of copying from/to userspace to ensure that the copies 489 * happen on all of the return paths of cond_wait. 490 */ 491static int do_vsoc_cond_wait(struct file *filp, 492 struct vsoc_cond_wait __user *untrusted_in) 493{ 494 struct vsoc_cond_wait arg; 495 int rval = 0; 496 497 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 498 return -EFAULT; 499 /* wakes is an out parameter. Initialize it to something sensible. */ 500 arg.wakes = 0; 501 rval = handle_vsoc_cond_wait(filp, &arg); 502 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 503 return -EFAULT; 504 return rval; 505} 506 507static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 508{ 509 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 510 u32 region_number = iminor(file_inode(filp)); 511 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 512 /* Ensure that the offset is aligned */ 513 if (offset & (sizeof(uint32_t) - 1)) 514 return -EADDRNOTAVAIL; 515 /* Ensure that the offset is within shared memory */ 516 if (((uint64_t)offset) + region_p->region_begin_offset + 517 sizeof(uint32_t) > region_p->region_end_offset) 518 return -E2BIG; 519 /* 520 * TODO(b/73664181): Use multiple futex wait queues. 521 * We need to wake every sleeper when the condition changes. Typically 522 * only a single thread will be waiting on the condition, but there 523 * are exceptions. The worst case is about 10 threads. 524 */ 525 wake_up_interruptible_all(&data->futex_wait_queue); 526 return 0; 527} 528 529static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 530{ 531 int rv = 0; 532 struct vsoc_device_region *region_p; 533 u32 reg_num; 534 struct vsoc_region_data *reg_data; 535 int retval = vsoc_validate_filep(filp); 536 537 if (retval) 538 return retval; 539 region_p = vsoc_region_from_filep(filp); 540 reg_num = iminor(file_inode(filp)); 541 reg_data = vsoc_dev.regions_data + reg_num; 542 switch (cmd) { 543 case VSOC_CREATE_FD_SCOPED_PERMISSION: 544 { 545 struct fd_scoped_permission_node *node = NULL; 546 547 node = kzalloc(sizeof(*node), GFP_KERNEL); 548 /* We can't allocate memory for the permission */ 549 if (!node) 550 return -ENOMEM; 551 INIT_LIST_HEAD(&node->list); 552 rv = do_create_fd_scoped_permission 553 (region_p, 554 node, 555 (struct fd_scoped_permission_arg __user *)arg); 556 if (!rv) { 557 mutex_lock(&vsoc_dev.mtx); 558 list_add(&node->list, &vsoc_dev.permissions); 559 mutex_unlock(&vsoc_dev.mtx); 560 } else { 561 kfree(node); 562 return rv; 563 } 564 } 565 break; 566 567 case VSOC_GET_FD_SCOPED_PERMISSION: 568 { 569 struct fd_scoped_permission_node *node = 570 ((struct vsoc_private_data *)filp->private_data)-> 571 fd_scoped_permission_node; 572 if (!node) 573 return -ENOENT; 574 if (copy_to_user 575 ((struct fd_scoped_permission __user *)arg, 576 &node->permission, sizeof(node->permission))) 577 return -EFAULT; 578 } 579 break; 580 581 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 582 if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { 583 writel(reg_num, vsoc_dev.regs + DOORBELL); 584 return 0; 585 } else { 586 return -EBUSY; 587 } 588 break; 589 590 case VSOC_SEND_INTERRUPT_TO_HOST: 591 writel(reg_num, vsoc_dev.regs + DOORBELL); 592 return 0; 593 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 594 wait_event_interruptible 595 (reg_data->interrupt_wait_queue, 596 (atomic_read(reg_data->incoming_signalled) != 0)); 597 break; 598 599 case VSOC_DESCRIBE_REGION: 600 return do_vsoc_describe_region 601 (filp, 602 (struct vsoc_device_region __user *)arg); 603 604 case VSOC_SELF_INTERRUPT: 605 atomic_set(reg_data->incoming_signalled, 1); 606 wake_up_interruptible(&reg_data->interrupt_wait_queue); 607 break; 608 609 case VSOC_COND_WAIT: 610 return do_vsoc_cond_wait(filp, 611 (struct vsoc_cond_wait __user *)arg); 612 case VSOC_COND_WAKE: 613 return do_vsoc_cond_wake(filp, arg); 614 615 default: 616 return -EINVAL; 617 } 618 return 0; 619} 620 621static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 622 loff_t *poffset) 623{ 624 __u32 area_off; 625 const void *area_p; 626 ssize_t area_len; 627 int retval = vsoc_validate_filep(filp); 628 629 if (retval) 630 return retval; 631 area_len = vsoc_get_area(filp, &area_off); 632 area_p = shm_off_to_virtual_addr(area_off); 633 area_p += *poffset; 634 area_len -= *poffset; 635 if (area_len <= 0) 636 return 0; 637 if (area_len < len) 638 len = area_len; 639 if (copy_to_user(buffer, area_p, len)) 640 return -EFAULT; 641 *poffset += len; 642 return len; 643} 644 645static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 646{ 647 ssize_t area_len = 0; 648 int retval = vsoc_validate_filep(filp); 649 650 if (retval) 651 return retval; 652 area_len = vsoc_get_area(filp, NULL); 653 switch (origin) { 654 case SEEK_SET: 655 break; 656 657 case SEEK_CUR: 658 if (offset > 0 && offset + filp->f_pos < 0) 659 return -EOVERFLOW; 660 offset += filp->f_pos; 661 break; 662 663 case SEEK_END: 664 if (offset > 0 && offset + area_len < 0) 665 return -EOVERFLOW; 666 offset += area_len; 667 break; 668 669 case SEEK_DATA: 670 if (offset >= area_len) 671 return -EINVAL; 672 if (offset < 0) 673 offset = 0; 674 break; 675 676 case SEEK_HOLE: 677 /* Next hole is always the end of the region, unless offset is 678 * beyond that 679 */ 680 if (offset < area_len) 681 offset = area_len; 682 break; 683 684 default: 685 return -EINVAL; 686 } 687 688 if (offset < 0 || offset > area_len) 689 return -EINVAL; 690 filp->f_pos = offset; 691 692 return offset; 693} 694 695static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 696 size_t len, loff_t *poffset) 697{ 698 __u32 area_off; 699 void *area_p; 700 ssize_t area_len; 701 int retval = vsoc_validate_filep(filp); 702 703 if (retval) 704 return retval; 705 area_len = vsoc_get_area(filp, &area_off); 706 area_p = shm_off_to_virtual_addr(area_off); 707 area_p += *poffset; 708 area_len -= *poffset; 709 if (area_len <= 0) 710 return 0; 711 if (area_len < len) 712 len = area_len; 713 if (copy_from_user(area_p, buffer, len)) 714 return -EFAULT; 715 *poffset += len; 716 return len; 717} 718 719static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 720{ 721 struct vsoc_region_data *region_data = 722 (struct vsoc_region_data *)region_data_v; 723 int reg_num = region_data - vsoc_dev.regions_data; 724 725 if (unlikely(!region_data)) 726 return IRQ_NONE; 727 728 if (unlikely(reg_num < 0 || 729 reg_num >= vsoc_dev.layout->region_count)) { 730 dev_err(&vsoc_dev.dev->dev, 731 "invalid irq @%p reg_num=0x%04x\n", 732 region_data, reg_num); 733 return IRQ_NONE; 734 } 735 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 736 dev_err(&vsoc_dev.dev->dev, 737 "irq not aligned @%p reg_num=0x%04x\n", 738 region_data, reg_num); 739 return IRQ_NONE; 740 } 741 wake_up_interruptible(&region_data->interrupt_wait_queue); 742 return IRQ_HANDLED; 743} 744 745static int vsoc_probe_device(struct pci_dev *pdev, 746 const struct pci_device_id *ent) 747{ 748 int result; 749 int i; 750 resource_size_t reg_size; 751 dev_t devt; 752 753 vsoc_dev.dev = pdev; 754 result = pci_enable_device(pdev); 755 if (result) { 756 dev_err(&pdev->dev, 757 "pci_enable_device failed %s: error %d\n", 758 pci_name(pdev), result); 759 return result; 760 } 761 vsoc_dev.enabled_device = true; 762 result = pci_request_regions(pdev, "vsoc"); 763 if (result < 0) { 764 dev_err(&pdev->dev, "pci_request_regions failed\n"); 765 vsoc_remove_device(pdev); 766 return -EBUSY; 767 } 768 vsoc_dev.requested_regions = true; 769 /* Set up the control registers in BAR 0 */ 770 reg_size = pci_resource_len(pdev, REGISTER_BAR); 771 if (reg_size > MAX_REGISTER_BAR_LEN) 772 vsoc_dev.regs = 773 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 774 else 775 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 776 777 if (!vsoc_dev.regs) { 778 dev_err(&pdev->dev, 779 "cannot map registers of size %zu\n", 780 (size_t)reg_size); 781 vsoc_remove_device(pdev); 782 return -EBUSY; 783 } 784 785 /* Map the shared memory in BAR 2 */ 786 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 787 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 788 789 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 790 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 791 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 792 if (!vsoc_dev.kernel_mapped_shm) { 793 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 794 vsoc_remove_device(pdev); 795 return -EBUSY; 796 } 797 798 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 799 vsoc_dev.kernel_mapped_shm; 800 dev_info(&pdev->dev, "major_version: %d\n", 801 vsoc_dev.layout->major_version); 802 dev_info(&pdev->dev, "minor_version: %d\n", 803 vsoc_dev.layout->minor_version); 804 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 805 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 806 if (vsoc_dev.layout->major_version != 807 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 808 dev_err(&vsoc_dev.dev->dev, 809 "driver supports only major_version %d\n", 810 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 811 vsoc_remove_device(pdev); 812 return -EBUSY; 813 } 814 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 815 VSOC_DEV_NAME); 816 if (result) { 817 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 818 vsoc_remove_device(pdev); 819 return -EBUSY; 820 } 821 vsoc_dev.major = MAJOR(devt); 822 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 823 vsoc_dev.cdev.owner = THIS_MODULE; 824 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 825 if (result) { 826 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 827 vsoc_remove_device(pdev); 828 return -EBUSY; 829 } 830 vsoc_dev.cdev_added = true; 831 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 832 if (IS_ERR(vsoc_dev.class)) { 833 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 834 vsoc_remove_device(pdev); 835 return PTR_ERR(vsoc_dev.class); 836 } 837 vsoc_dev.class_added = true; 838 vsoc_dev.regions = (struct vsoc_device_region __force *) 839 ((void *)vsoc_dev.layout + 840 vsoc_dev.layout->vsoc_region_desc_offset); 841 vsoc_dev.msix_entries = 842 kcalloc(vsoc_dev.layout->region_count, 843 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 844 if (!vsoc_dev.msix_entries) { 845 dev_err(&vsoc_dev.dev->dev, 846 "unable to allocate msix_entries\n"); 847 vsoc_remove_device(pdev); 848 return -ENOSPC; 849 } 850 vsoc_dev.regions_data = 851 kcalloc(vsoc_dev.layout->region_count, 852 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 853 if (!vsoc_dev.regions_data) { 854 dev_err(&vsoc_dev.dev->dev, 855 "unable to allocate regions' data\n"); 856 vsoc_remove_device(pdev); 857 return -ENOSPC; 858 } 859 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 860 vsoc_dev.msix_entries[i].entry = i; 861 862 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 863 vsoc_dev.layout->region_count); 864 if (result) { 865 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 866 vsoc_remove_device(pdev); 867 return -ENOSPC; 868 } 869 /* Check that all regions are well formed */ 870 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 871 const struct vsoc_device_region *region = vsoc_dev.regions + i; 872 873 if (!PAGE_ALIGNED(region->region_begin_offset) || 874 !PAGE_ALIGNED(region->region_end_offset)) { 875 dev_err(&vsoc_dev.dev->dev, 876 "region %d not aligned (%x:%x)", i, 877 region->region_begin_offset, 878 region->region_end_offset); 879 vsoc_remove_device(pdev); 880 return -EFAULT; 881 } 882 if (region->region_begin_offset >= region->region_end_offset || 883 region->region_end_offset > vsoc_dev.shm_size) { 884 dev_err(&vsoc_dev.dev->dev, 885 "region %d offsets are wrong: %x %x %zx", 886 i, region->region_begin_offset, 887 region->region_end_offset, vsoc_dev.shm_size); 888 vsoc_remove_device(pdev); 889 return -EFAULT; 890 } 891 if (region->managed_by >= vsoc_dev.layout->region_count) { 892 dev_err(&vsoc_dev.dev->dev, 893 "region %d has invalid owner: %u", 894 i, region->managed_by); 895 vsoc_remove_device(pdev); 896 return -EFAULT; 897 } 898 } 899 vsoc_dev.msix_enabled = true; 900 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 901 const struct vsoc_device_region *region = vsoc_dev.regions + i; 902 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 903 const struct vsoc_signal_table_layout *h_to_g_signal_table = 904 &region->host_to_guest_signal_table; 905 const struct vsoc_signal_table_layout *g_to_h_signal_table = 906 &region->guest_to_host_signal_table; 907 908 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 909 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 910 name_sz); 911 dev_info(&pdev->dev, "region %d name=%s\n", 912 i, vsoc_dev.regions_data[i].name); 913 init_waitqueue_head 914 (&vsoc_dev.regions_data[i].interrupt_wait_queue); 915 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 916 vsoc_dev.regions_data[i].incoming_signalled = 917 shm_off_to_virtual_addr(region->region_begin_offset) + 918 h_to_g_signal_table->interrupt_signalled_offset; 919 vsoc_dev.regions_data[i].outgoing_signalled = 920 shm_off_to_virtual_addr(region->region_begin_offset) + 921 g_to_h_signal_table->interrupt_signalled_offset; 922 result = request_irq(vsoc_dev.msix_entries[i].vector, 923 vsoc_interrupt, 0, 924 vsoc_dev.regions_data[i].name, 925 vsoc_dev.regions_data + i); 926 if (result) { 927 dev_info(&pdev->dev, 928 "request_irq failed irq=%d vector=%d\n", 929 i, vsoc_dev.msix_entries[i].vector); 930 vsoc_remove_device(pdev); 931 return -ENOSPC; 932 } 933 vsoc_dev.regions_data[i].irq_requested = true; 934 if (!device_create(vsoc_dev.class, NULL, 935 MKDEV(vsoc_dev.major, i), 936 NULL, vsoc_dev.regions_data[i].name)) { 937 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 938 vsoc_remove_device(pdev); 939 return -EBUSY; 940 } 941 vsoc_dev.regions_data[i].device_created = true; 942 } 943 return 0; 944} 945 946/* 947 * This should undo all of the allocations in the probe function in reverse 948 * order. 949 * 950 * Notes: 951 * 952 * The device may have been partially initialized, so double check 953 * that the allocations happened. 954 * 955 * This function may be called multiple times, so mark resources as freed 956 * as they are deallocated. 957 */ 958static void vsoc_remove_device(struct pci_dev *pdev) 959{ 960 int i; 961 /* 962 * pdev is the first thing to be set on probe and the last thing 963 * to be cleared here. If it's NULL then there is no cleanup. 964 */ 965 if (!pdev || !vsoc_dev.dev) 966 return; 967 dev_info(&pdev->dev, "remove_device\n"); 968 if (vsoc_dev.regions_data) { 969 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 970 if (vsoc_dev.regions_data[i].device_created) { 971 device_destroy(vsoc_dev.class, 972 MKDEV(vsoc_dev.major, i)); 973 vsoc_dev.regions_data[i].device_created = false; 974 } 975 if (vsoc_dev.regions_data[i].irq_requested) 976 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 977 vsoc_dev.regions_data[i].irq_requested = false; 978 } 979 kfree(vsoc_dev.regions_data); 980 vsoc_dev.regions_data = NULL; 981 } 982 if (vsoc_dev.msix_enabled) { 983 pci_disable_msix(pdev); 984 vsoc_dev.msix_enabled = false; 985 } 986 kfree(vsoc_dev.msix_entries); 987 vsoc_dev.msix_entries = NULL; 988 vsoc_dev.regions = NULL; 989 if (vsoc_dev.class_added) { 990 class_destroy(vsoc_dev.class); 991 vsoc_dev.class_added = false; 992 } 993 if (vsoc_dev.cdev_added) { 994 cdev_del(&vsoc_dev.cdev); 995 vsoc_dev.cdev_added = false; 996 } 997 if (vsoc_dev.major && vsoc_dev.layout) { 998 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 999 vsoc_dev.layout->region_count); 1000 vsoc_dev.major = 0; 1001 } 1002 vsoc_dev.layout = NULL; 1003 if (vsoc_dev.kernel_mapped_shm) { 1004 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1005 vsoc_dev.kernel_mapped_shm = NULL; 1006 } 1007 if (vsoc_dev.regs) { 1008 pci_iounmap(pdev, vsoc_dev.regs); 1009 vsoc_dev.regs = NULL; 1010 } 1011 if (vsoc_dev.requested_regions) { 1012 pci_release_regions(pdev); 1013 vsoc_dev.requested_regions = false; 1014 } 1015 if (vsoc_dev.enabled_device) { 1016 pci_disable_device(pdev); 1017 vsoc_dev.enabled_device = false; 1018 } 1019 /* Do this last: it indicates that the device is not initialized. */ 1020 vsoc_dev.dev = NULL; 1021} 1022 1023static void __exit vsoc_cleanup_module(void) 1024{ 1025 vsoc_remove_device(vsoc_dev.dev); 1026 pci_unregister_driver(&vsoc_pci_driver); 1027} 1028 1029static int __init vsoc_init_module(void) 1030{ 1031 int err = -ENOMEM; 1032 1033 INIT_LIST_HEAD(&vsoc_dev.permissions); 1034 mutex_init(&vsoc_dev.mtx); 1035 1036 err = pci_register_driver(&vsoc_pci_driver); 1037 if (err < 0) 1038 return err; 1039 return 0; 1040} 1041 1042static int vsoc_open(struct inode *inode, struct file *filp) 1043{ 1044 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1045 int ret = vsoc_validate_inode(inode); 1046 1047 if (ret) 1048 return ret; 1049 filp->private_data = 1050 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1051 if (!filp->private_data) 1052 return -ENOMEM; 1053 return 0; 1054} 1055 1056static int vsoc_release(struct inode *inode, struct file *filp) 1057{ 1058 struct vsoc_private_data *private_data = NULL; 1059 struct fd_scoped_permission_node *node = NULL; 1060 struct vsoc_device_region *owner_region_p = NULL; 1061 int retval = vsoc_validate_filep(filp); 1062 1063 if (retval) 1064 return retval; 1065 private_data = (struct vsoc_private_data *)filp->private_data; 1066 if (!private_data) 1067 return 0; 1068 1069 node = private_data->fd_scoped_permission_node; 1070 if (node) { 1071 owner_region_p = vsoc_region_from_inode(inode); 1072 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1073 owner_region_p = 1074 &vsoc_dev.regions[owner_region_p->managed_by]; 1075 } 1076 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1077 private_data->fd_scoped_permission_node = NULL; 1078 } 1079 kfree(private_data); 1080 filp->private_data = NULL; 1081 1082 return 0; 1083} 1084 1085/* 1086 * Returns the device relative offset and length of the area specified by the 1087 * fd scoped permission. If there is no fd scoped permission set, a default 1088 * permission covering the entire region is assumed, unless the region is owned 1089 * by another one, in which case the default is a permission with zero size. 1090 */ 1091static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1092{ 1093 __u32 off = 0; 1094 ssize_t length = 0; 1095 struct vsoc_device_region *region_p; 1096 struct fd_scoped_permission *perm; 1097 1098 region_p = vsoc_region_from_filep(filp); 1099 off = region_p->region_begin_offset; 1100 perm = &((struct vsoc_private_data *)filp->private_data)-> 1101 fd_scoped_permission_node->permission; 1102 if (perm) { 1103 off += perm->begin_offset; 1104 length = perm->end_offset - perm->begin_offset; 1105 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1106 /* No permission set and the regions is not owned by another, 1107 * default to full region access. 1108 */ 1109 length = vsoc_device_region_size(region_p); 1110 } else { 1111 /* return zero length, access is denied. */ 1112 length = 0; 1113 } 1114 if (area_offset) 1115 *area_offset = off; 1116 return length; 1117} 1118 1119static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1120{ 1121 unsigned long len = vma->vm_end - vma->vm_start; 1122 __u32 area_off; 1123 phys_addr_t mem_off; 1124 ssize_t area_len; 1125 int retval = vsoc_validate_filep(filp); 1126 1127 if (retval) 1128 return retval; 1129 area_len = vsoc_get_area(filp, &area_off); 1130 /* Add the requested offset */ 1131 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1132 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1133 if (area_len < len) 1134 return -EINVAL; 1135 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1136 mem_off = shm_off_to_phys_addr(area_off); 1137 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1138 len, vma->vm_page_prot)) 1139 return -EAGAIN; 1140 return 0; 1141} 1142 1143module_init(vsoc_init_module); 1144module_exit(vsoc_cleanup_module); 1145 1146MODULE_LICENSE("GPL"); 1147MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1148MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1149MODULE_VERSION("1.0");