at v5.1-rc4 1150 lines 33 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * drivers/android/staging/vsoc.c 4 * 5 * Android Virtual System on a Chip (VSoC) driver 6 * 7 * Copyright (C) 2017 Google, Inc. 8 * 9 * Author: ghartman@google.com 10 * 11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 13 * 14 * Based on cirrusfb.c and 8139cp.c: 15 * Copyright 1999-2001 Jeff Garzik 16 * Copyright 2001-2004 Jeff Garzik 17 */ 18 19#include <linux/dma-mapping.h> 20#include <linux/freezer.h> 21#include <linux/futex.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/mutex.h> 26#include <linux/pci.h> 27#include <linux/proc_fs.h> 28#include <linux/sched.h> 29#include <linux/syscalls.h> 30#include <linux/uaccess.h> 31#include <linux/interrupt.h> 32#include <linux/cdev.h> 33#include <linux/file.h> 34#include "uapi/vsoc_shm.h" 35 36#define VSOC_DEV_NAME "vsoc" 37 38/* 39 * Description of the ivshmem-doorbell PCI device used by QEmu. These 40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 41 * the QEmu repository. This was last reconciled with the version that 42 * came out with 2.8 43 */ 44 45/* 46 * These constants are determined KVM Inter-VM shared memory device 47 * register offsets 48 */ 49enum { 50 INTR_MASK = 0x00, /* Interrupt Mask */ 51 INTR_STATUS = 0x04, /* Interrupt Status */ 52 IV_POSITION = 0x08, /* VM ID */ 53 DOORBELL = 0x0c, /* Doorbell */ 54}; 55 56static const int REGISTER_BAR; /* Equal to 0 */ 57static const int MAX_REGISTER_BAR_LEN = 0x100; 58/* 59 * The MSI-x BAR is not used directly. 60 * 61 * static const int MSI_X_BAR = 1; 62 */ 63static const int SHARED_MEMORY_BAR = 2; 64 65struct vsoc_region_data { 66 char name[VSOC_DEVICE_NAME_SZ + 1]; 67 wait_queue_head_t interrupt_wait_queue; 68 /* TODO(b/73664181): Use multiple futex wait queues */ 69 wait_queue_head_t futex_wait_queue; 70 /* Flag indicating that an interrupt has been signalled by the host. */ 71 atomic_t *incoming_signalled; 72 /* Flag indicating the guest has signalled the host. */ 73 atomic_t *outgoing_signalled; 74 bool irq_requested; 75 bool device_created; 76}; 77 78struct vsoc_device { 79 /* Kernel virtual address of REGISTER_BAR. */ 80 void __iomem *regs; 81 /* Physical address of SHARED_MEMORY_BAR. */ 82 phys_addr_t shm_phys_start; 83 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 84 void __iomem *kernel_mapped_shm; 85 /* Size of the entire shared memory window in bytes. */ 86 size_t shm_size; 87 /* 88 * Pointer to the virtual address of the shared memory layout structure. 89 * This is probably identical to kernel_mapped_shm, but saving this 90 * here saves a lot of annoying casts. 91 */ 92 struct vsoc_shm_layout_descriptor *layout; 93 /* 94 * Points to a table of region descriptors in the kernel's virtual 95 * address space. Calculated from 96 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 97 */ 98 struct vsoc_device_region *regions; 99 /* Head of a list of permissions that have been granted. */ 100 struct list_head permissions; 101 struct pci_dev *dev; 102 /* Per-region (and therefore per-interrupt) information. */ 103 struct vsoc_region_data *regions_data; 104 /* 105 * Table of msi-x entries. This has to be separated from struct 106 * vsoc_region_data because the kernel deals with them as an array. 107 */ 108 struct msix_entry *msix_entries; 109 /* Mutex that protectes the permission list */ 110 struct mutex mtx; 111 /* Major number assigned by the kernel */ 112 int major; 113 /* Character device assigned by the kernel */ 114 struct cdev cdev; 115 /* Device class assigned by the kernel */ 116 struct class *class; 117 /* 118 * Flags that indicate what we've initialized. These are used to do an 119 * orderly cleanup of the device. 120 */ 121 bool enabled_device; 122 bool requested_regions; 123 bool cdev_added; 124 bool class_added; 125 bool msix_enabled; 126}; 127 128static struct vsoc_device vsoc_dev; 129 130/* 131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 132 */ 133 134struct fd_scoped_permission_node { 135 struct fd_scoped_permission permission; 136 struct list_head list; 137}; 138 139struct vsoc_private_data { 140 struct fd_scoped_permission_node *fd_scoped_permission_node; 141}; 142 143static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 144static int vsoc_mmap(struct file *, struct vm_area_struct *); 145static int vsoc_open(struct inode *, struct file *); 146static int vsoc_release(struct inode *, struct file *); 147static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 148static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 149static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 150static int 151do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 152 struct fd_scoped_permission_node *np, 153 struct fd_scoped_permission_arg __user *arg); 154static void 155do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 156 struct fd_scoped_permission *perm); 157static long do_vsoc_describe_region(struct file *, 158 struct vsoc_device_region __user *); 159static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 160 161/** 162 * Validate arguments on entry points to the driver. 163 */ 164inline int vsoc_validate_inode(struct inode *inode) 165{ 166 if (iminor(inode) >= vsoc_dev.layout->region_count) { 167 dev_err(&vsoc_dev.dev->dev, 168 "describe_region: invalid region %d\n", iminor(inode)); 169 return -ENODEV; 170 } 171 return 0; 172} 173 174inline int vsoc_validate_filep(struct file *filp) 175{ 176 int ret = vsoc_validate_inode(file_inode(filp)); 177 178 if (ret) 179 return ret; 180 if (!filp->private_data) { 181 dev_err(&vsoc_dev.dev->dev, 182 "No private data on fd, region %d\n", 183 iminor(file_inode(filp))); 184 return -EBADFD; 185 } 186 return 0; 187} 188 189/* Converts from shared memory offset to virtual address */ 190static inline void *shm_off_to_virtual_addr(__u32 offset) 191{ 192 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 193} 194 195/* Converts from shared memory offset to physical address */ 196static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 197{ 198 return vsoc_dev.shm_phys_start + offset; 199} 200 201/** 202 * Convenience functions to obtain the region from the inode or file. 203 * Dangerous to call before validating the inode/file. 204 */ 205static 206inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) 207{ 208 return &vsoc_dev.regions[iminor(inode)]; 209} 210 211static 212inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) 213{ 214 return vsoc_region_from_inode(file_inode(inode)); 215} 216 217static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 218{ 219 return r->region_end_offset - r->region_begin_offset; 220} 221 222static const struct file_operations vsoc_ops = { 223 .owner = THIS_MODULE, 224 .open = vsoc_open, 225 .mmap = vsoc_mmap, 226 .read = vsoc_read, 227 .unlocked_ioctl = vsoc_ioctl, 228 .compat_ioctl = vsoc_ioctl, 229 .write = vsoc_write, 230 .llseek = vsoc_lseek, 231 .release = vsoc_release, 232}; 233 234static struct pci_device_id vsoc_id_table[] = { 235 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 236 {0}, 237}; 238 239MODULE_DEVICE_TABLE(pci, vsoc_id_table); 240 241static void vsoc_remove_device(struct pci_dev *pdev); 242static int vsoc_probe_device(struct pci_dev *pdev, 243 const struct pci_device_id *ent); 244 245static struct pci_driver vsoc_pci_driver = { 246 .name = "vsoc", 247 .id_table = vsoc_id_table, 248 .probe = vsoc_probe_device, 249 .remove = vsoc_remove_device, 250}; 251 252static int 253do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 254 struct fd_scoped_permission_node *np, 255 struct fd_scoped_permission_arg __user *arg) 256{ 257 struct file *managed_filp; 258 s32 managed_fd; 259 atomic_t *owner_ptr = NULL; 260 struct vsoc_device_region *managed_region_p; 261 262 if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || 263 copy_from_user(&managed_fd, 264 &arg->managed_region_fd, sizeof(managed_fd))) { 265 return -EFAULT; 266 } 267 managed_filp = fdget(managed_fd).file; 268 /* Check that it's a valid fd, */ 269 if (!managed_filp || vsoc_validate_filep(managed_filp)) 270 return -EPERM; 271 /* EEXIST if the given fd already has a permission. */ 272 if (((struct vsoc_private_data *)managed_filp->private_data)-> 273 fd_scoped_permission_node) 274 return -EEXIST; 275 managed_region_p = vsoc_region_from_filep(managed_filp); 276 /* Check that the provided region is managed by this one */ 277 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 278 return -EPERM; 279 /* The area must be well formed and have non-zero size */ 280 if (np->permission.begin_offset >= np->permission.end_offset) 281 return -EINVAL; 282 /* The area must fit in the memory window */ 283 if (np->permission.end_offset > 284 vsoc_device_region_size(managed_region_p)) 285 return -ERANGE; 286 /* The area must be in the region data section */ 287 if (np->permission.begin_offset < 288 managed_region_p->offset_of_region_data) 289 return -ERANGE; 290 /* The area must be page aligned */ 291 if (!PAGE_ALIGNED(np->permission.begin_offset) || 292 !PAGE_ALIGNED(np->permission.end_offset)) 293 return -EINVAL; 294 /* Owner offset must be naturally aligned in the window */ 295 if (np->permission.owner_offset & 296 (sizeof(np->permission.owner_offset) - 1)) 297 return -EINVAL; 298 /* The owner flag must reside in the owner memory */ 299 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 300 vsoc_device_region_size(region_p)) 301 return -ERANGE; 302 /* The owner flag must reside in the data section */ 303 if (np->permission.owner_offset < region_p->offset_of_region_data) 304 return -EINVAL; 305 /* The owner value must change to claim the memory */ 306 if (np->permission.owned_value == VSOC_REGION_FREE) 307 return -EINVAL; 308 owner_ptr = 309 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 310 np->permission.owner_offset); 311 /* We've already verified that this is in the shared memory window, so 312 * it should be safe to write to this address. 313 */ 314 if (atomic_cmpxchg(owner_ptr, 315 VSOC_REGION_FREE, 316 np->permission.owned_value) != VSOC_REGION_FREE) { 317 return -EBUSY; 318 } 319 ((struct vsoc_private_data *)managed_filp->private_data)-> 320 fd_scoped_permission_node = np; 321 /* The file offset needs to be adjusted if the calling 322 * process did any read/write operations on the fd 323 * before creating the permission. 324 */ 325 if (managed_filp->f_pos) { 326 if (managed_filp->f_pos > np->permission.end_offset) { 327 /* If the offset is beyond the permission end, set it 328 * to the end. 329 */ 330 managed_filp->f_pos = np->permission.end_offset; 331 } else { 332 /* If the offset is within the permission interval 333 * keep it there otherwise reset it to zero. 334 */ 335 if (managed_filp->f_pos < np->permission.begin_offset) { 336 managed_filp->f_pos = 0; 337 } else { 338 managed_filp->f_pos -= 339 np->permission.begin_offset; 340 } 341 } 342 } 343 return 0; 344} 345 346static void 347do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, 348 struct fd_scoped_permission_node *node) 349{ 350 if (node) { 351 do_destroy_fd_scoped_permission(owner_region_p, 352 &node->permission); 353 mutex_lock(&vsoc_dev.mtx); 354 list_del(&node->list); 355 mutex_unlock(&vsoc_dev.mtx); 356 kfree(node); 357 } 358} 359 360static void 361do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 362 struct fd_scoped_permission *perm) 363{ 364 atomic_t *owner_ptr = NULL; 365 int prev = 0; 366 367 if (!perm) 368 return; 369 owner_ptr = (atomic_t *)shm_off_to_virtual_addr 370 (owner_region_p->region_begin_offset + perm->owner_offset); 371 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 372 if (prev != perm->owned_value) 373 dev_err(&vsoc_dev.dev->dev, 374 "%x-%x: owner (%s) %x: expected to be %x was %x", 375 perm->begin_offset, perm->end_offset, 376 owner_region_p->device_name, perm->owner_offset, 377 perm->owned_value, prev); 378} 379 380static long do_vsoc_describe_region(struct file *filp, 381 struct vsoc_device_region __user *dest) 382{ 383 struct vsoc_device_region *region_p; 384 int retval = vsoc_validate_filep(filp); 385 386 if (retval) 387 return retval; 388 region_p = vsoc_region_from_filep(filp); 389 if (copy_to_user(dest, region_p, sizeof(*region_p))) 390 return -EFAULT; 391 return 0; 392} 393 394/** 395 * Implements the inner logic of cond_wait. Copies to and from userspace are 396 * done in the helper function below. 397 */ 398static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 399{ 400 DEFINE_WAIT(wait); 401 u32 region_number = iminor(file_inode(filp)); 402 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 403 struct hrtimer_sleeper timeout, *to = NULL; 404 int ret = 0; 405 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 406 atomic_t *address = NULL; 407 ktime_t wake_time; 408 409 /* Ensure that the offset is aligned */ 410 if (arg->offset & (sizeof(uint32_t) - 1)) 411 return -EADDRNOTAVAIL; 412 /* Ensure that the offset is within shared memory */ 413 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 414 sizeof(uint32_t) > region_p->region_end_offset) 415 return -E2BIG; 416 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 417 arg->offset); 418 419 /* Ensure that the type of wait is valid */ 420 switch (arg->wait_type) { 421 case VSOC_WAIT_IF_EQUAL: 422 break; 423 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 424 to = &timeout; 425 break; 426 default: 427 return -EINVAL; 428 } 429 430 if (to) { 431 /* Copy the user-supplied timesec into the kernel structure. 432 * We do things this way to flatten differences between 32 bit 433 * and 64 bit timespecs. 434 */ 435 if (arg->wake_time_nsec >= NSEC_PER_SEC) 436 return -EINVAL; 437 wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); 438 439 hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, 440 HRTIMER_MODE_ABS); 441 hrtimer_set_expires_range_ns(&to->timer, wake_time, 442 current->timer_slack_ns); 443 444 hrtimer_init_sleeper(to, current); 445 } 446 447 while (1) { 448 prepare_to_wait(&data->futex_wait_queue, &wait, 449 TASK_INTERRUPTIBLE); 450 /* 451 * Check the sentinel value after prepare_to_wait. If the value 452 * changes after this check the writer will call signal, 453 * changing the task state from INTERRUPTIBLE to RUNNING. That 454 * will ensure that schedule() will eventually schedule this 455 * task. 456 */ 457 if (atomic_read(address) != arg->value) { 458 ret = 0; 459 break; 460 } 461 if (to) { 462 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 463 if (likely(to->task)) 464 freezable_schedule(); 465 hrtimer_cancel(&to->timer); 466 if (!to->task) { 467 ret = -ETIMEDOUT; 468 break; 469 } 470 } else { 471 freezable_schedule(); 472 } 473 /* Count the number of times that we woke up. This is useful 474 * for unit testing. 475 */ 476 ++arg->wakes; 477 if (signal_pending(current)) { 478 ret = -EINTR; 479 break; 480 } 481 } 482 finish_wait(&data->futex_wait_queue, &wait); 483 if (to) 484 destroy_hrtimer_on_stack(&to->timer); 485 return ret; 486} 487 488/** 489 * Handles the details of copying from/to userspace to ensure that the copies 490 * happen on all of the return paths of cond_wait. 491 */ 492static int do_vsoc_cond_wait(struct file *filp, 493 struct vsoc_cond_wait __user *untrusted_in) 494{ 495 struct vsoc_cond_wait arg; 496 int rval = 0; 497 498 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 499 return -EFAULT; 500 /* wakes is an out parameter. Initialize it to something sensible. */ 501 arg.wakes = 0; 502 rval = handle_vsoc_cond_wait(filp, &arg); 503 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 504 return -EFAULT; 505 return rval; 506} 507 508static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 509{ 510 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 511 u32 region_number = iminor(file_inode(filp)); 512 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 513 /* Ensure that the offset is aligned */ 514 if (offset & (sizeof(uint32_t) - 1)) 515 return -EADDRNOTAVAIL; 516 /* Ensure that the offset is within shared memory */ 517 if (((uint64_t)offset) + region_p->region_begin_offset + 518 sizeof(uint32_t) > region_p->region_end_offset) 519 return -E2BIG; 520 /* 521 * TODO(b/73664181): Use multiple futex wait queues. 522 * We need to wake every sleeper when the condition changes. Typically 523 * only a single thread will be waiting on the condition, but there 524 * are exceptions. The worst case is about 10 threads. 525 */ 526 wake_up_interruptible_all(&data->futex_wait_queue); 527 return 0; 528} 529 530static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 531{ 532 int rv = 0; 533 struct vsoc_device_region *region_p; 534 u32 reg_num; 535 struct vsoc_region_data *reg_data; 536 int retval = vsoc_validate_filep(filp); 537 538 if (retval) 539 return retval; 540 region_p = vsoc_region_from_filep(filp); 541 reg_num = iminor(file_inode(filp)); 542 reg_data = vsoc_dev.regions_data + reg_num; 543 switch (cmd) { 544 case VSOC_CREATE_FD_SCOPED_PERMISSION: 545 { 546 struct fd_scoped_permission_node *node = NULL; 547 548 node = kzalloc(sizeof(*node), GFP_KERNEL); 549 /* We can't allocate memory for the permission */ 550 if (!node) 551 return -ENOMEM; 552 INIT_LIST_HEAD(&node->list); 553 rv = do_create_fd_scoped_permission 554 (region_p, 555 node, 556 (struct fd_scoped_permission_arg __user *)arg); 557 if (!rv) { 558 mutex_lock(&vsoc_dev.mtx); 559 list_add(&node->list, &vsoc_dev.permissions); 560 mutex_unlock(&vsoc_dev.mtx); 561 } else { 562 kfree(node); 563 return rv; 564 } 565 } 566 break; 567 568 case VSOC_GET_FD_SCOPED_PERMISSION: 569 { 570 struct fd_scoped_permission_node *node = 571 ((struct vsoc_private_data *)filp->private_data)-> 572 fd_scoped_permission_node; 573 if (!node) 574 return -ENOENT; 575 if (copy_to_user 576 ((struct fd_scoped_permission __user *)arg, 577 &node->permission, sizeof(node->permission))) 578 return -EFAULT; 579 } 580 break; 581 582 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 583 if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { 584 writel(reg_num, vsoc_dev.regs + DOORBELL); 585 return 0; 586 } else { 587 return -EBUSY; 588 } 589 break; 590 591 case VSOC_SEND_INTERRUPT_TO_HOST: 592 writel(reg_num, vsoc_dev.regs + DOORBELL); 593 return 0; 594 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 595 wait_event_interruptible 596 (reg_data->interrupt_wait_queue, 597 (atomic_read(reg_data->incoming_signalled) != 0)); 598 break; 599 600 case VSOC_DESCRIBE_REGION: 601 return do_vsoc_describe_region 602 (filp, 603 (struct vsoc_device_region __user *)arg); 604 605 case VSOC_SELF_INTERRUPT: 606 atomic_set(reg_data->incoming_signalled, 1); 607 wake_up_interruptible(&reg_data->interrupt_wait_queue); 608 break; 609 610 case VSOC_COND_WAIT: 611 return do_vsoc_cond_wait(filp, 612 (struct vsoc_cond_wait __user *)arg); 613 case VSOC_COND_WAKE: 614 return do_vsoc_cond_wake(filp, arg); 615 616 default: 617 return -EINVAL; 618 } 619 return 0; 620} 621 622static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 623 loff_t *poffset) 624{ 625 __u32 area_off; 626 const void *area_p; 627 ssize_t area_len; 628 int retval = vsoc_validate_filep(filp); 629 630 if (retval) 631 return retval; 632 area_len = vsoc_get_area(filp, &area_off); 633 area_p = shm_off_to_virtual_addr(area_off); 634 area_p += *poffset; 635 area_len -= *poffset; 636 if (area_len <= 0) 637 return 0; 638 if (area_len < len) 639 len = area_len; 640 if (copy_to_user(buffer, area_p, len)) 641 return -EFAULT; 642 *poffset += len; 643 return len; 644} 645 646static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 647{ 648 ssize_t area_len = 0; 649 int retval = vsoc_validate_filep(filp); 650 651 if (retval) 652 return retval; 653 area_len = vsoc_get_area(filp, NULL); 654 switch (origin) { 655 case SEEK_SET: 656 break; 657 658 case SEEK_CUR: 659 if (offset > 0 && offset + filp->f_pos < 0) 660 return -EOVERFLOW; 661 offset += filp->f_pos; 662 break; 663 664 case SEEK_END: 665 if (offset > 0 && offset + area_len < 0) 666 return -EOVERFLOW; 667 offset += area_len; 668 break; 669 670 case SEEK_DATA: 671 if (offset >= area_len) 672 return -EINVAL; 673 if (offset < 0) 674 offset = 0; 675 break; 676 677 case SEEK_HOLE: 678 /* Next hole is always the end of the region, unless offset is 679 * beyond that 680 */ 681 if (offset < area_len) 682 offset = area_len; 683 break; 684 685 default: 686 return -EINVAL; 687 } 688 689 if (offset < 0 || offset > area_len) 690 return -EINVAL; 691 filp->f_pos = offset; 692 693 return offset; 694} 695 696static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 697 size_t len, loff_t *poffset) 698{ 699 __u32 area_off; 700 void *area_p; 701 ssize_t area_len; 702 int retval = vsoc_validate_filep(filp); 703 704 if (retval) 705 return retval; 706 area_len = vsoc_get_area(filp, &area_off); 707 area_p = shm_off_to_virtual_addr(area_off); 708 area_p += *poffset; 709 area_len -= *poffset; 710 if (area_len <= 0) 711 return 0; 712 if (area_len < len) 713 len = area_len; 714 if (copy_from_user(area_p, buffer, len)) 715 return -EFAULT; 716 *poffset += len; 717 return len; 718} 719 720static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 721{ 722 struct vsoc_region_data *region_data = 723 (struct vsoc_region_data *)region_data_v; 724 int reg_num = region_data - vsoc_dev.regions_data; 725 726 if (unlikely(!region_data)) 727 return IRQ_NONE; 728 729 if (unlikely(reg_num < 0 || 730 reg_num >= vsoc_dev.layout->region_count)) { 731 dev_err(&vsoc_dev.dev->dev, 732 "invalid irq @%p reg_num=0x%04x\n", 733 region_data, reg_num); 734 return IRQ_NONE; 735 } 736 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 737 dev_err(&vsoc_dev.dev->dev, 738 "irq not aligned @%p reg_num=0x%04x\n", 739 region_data, reg_num); 740 return IRQ_NONE; 741 } 742 wake_up_interruptible(&region_data->interrupt_wait_queue); 743 return IRQ_HANDLED; 744} 745 746static int vsoc_probe_device(struct pci_dev *pdev, 747 const struct pci_device_id *ent) 748{ 749 int result; 750 int i; 751 resource_size_t reg_size; 752 dev_t devt; 753 754 vsoc_dev.dev = pdev; 755 result = pci_enable_device(pdev); 756 if (result) { 757 dev_err(&pdev->dev, 758 "pci_enable_device failed %s: error %d\n", 759 pci_name(pdev), result); 760 return result; 761 } 762 vsoc_dev.enabled_device = true; 763 result = pci_request_regions(pdev, "vsoc"); 764 if (result < 0) { 765 dev_err(&pdev->dev, "pci_request_regions failed\n"); 766 vsoc_remove_device(pdev); 767 return -EBUSY; 768 } 769 vsoc_dev.requested_regions = true; 770 /* Set up the control registers in BAR 0 */ 771 reg_size = pci_resource_len(pdev, REGISTER_BAR); 772 if (reg_size > MAX_REGISTER_BAR_LEN) 773 vsoc_dev.regs = 774 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 775 else 776 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 777 778 if (!vsoc_dev.regs) { 779 dev_err(&pdev->dev, 780 "cannot map registers of size %zu\n", 781 (size_t)reg_size); 782 vsoc_remove_device(pdev); 783 return -EBUSY; 784 } 785 786 /* Map the shared memory in BAR 2 */ 787 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 788 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 789 790 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 791 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 792 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 793 if (!vsoc_dev.kernel_mapped_shm) { 794 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 795 vsoc_remove_device(pdev); 796 return -EBUSY; 797 } 798 799 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 800 vsoc_dev.kernel_mapped_shm; 801 dev_info(&pdev->dev, "major_version: %d\n", 802 vsoc_dev.layout->major_version); 803 dev_info(&pdev->dev, "minor_version: %d\n", 804 vsoc_dev.layout->minor_version); 805 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 806 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 807 if (vsoc_dev.layout->major_version != 808 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 809 dev_err(&vsoc_dev.dev->dev, 810 "driver supports only major_version %d\n", 811 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 812 vsoc_remove_device(pdev); 813 return -EBUSY; 814 } 815 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 816 VSOC_DEV_NAME); 817 if (result) { 818 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 819 vsoc_remove_device(pdev); 820 return -EBUSY; 821 } 822 vsoc_dev.major = MAJOR(devt); 823 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 824 vsoc_dev.cdev.owner = THIS_MODULE; 825 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 826 if (result) { 827 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 828 vsoc_remove_device(pdev); 829 return -EBUSY; 830 } 831 vsoc_dev.cdev_added = true; 832 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 833 if (IS_ERR(vsoc_dev.class)) { 834 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 835 vsoc_remove_device(pdev); 836 return PTR_ERR(vsoc_dev.class); 837 } 838 vsoc_dev.class_added = true; 839 vsoc_dev.regions = (struct vsoc_device_region __force *) 840 ((void *)vsoc_dev.layout + 841 vsoc_dev.layout->vsoc_region_desc_offset); 842 vsoc_dev.msix_entries = 843 kcalloc(vsoc_dev.layout->region_count, 844 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 845 if (!vsoc_dev.msix_entries) { 846 dev_err(&vsoc_dev.dev->dev, 847 "unable to allocate msix_entries\n"); 848 vsoc_remove_device(pdev); 849 return -ENOSPC; 850 } 851 vsoc_dev.regions_data = 852 kcalloc(vsoc_dev.layout->region_count, 853 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 854 if (!vsoc_dev.regions_data) { 855 dev_err(&vsoc_dev.dev->dev, 856 "unable to allocate regions' data\n"); 857 vsoc_remove_device(pdev); 858 return -ENOSPC; 859 } 860 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 861 vsoc_dev.msix_entries[i].entry = i; 862 863 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 864 vsoc_dev.layout->region_count); 865 if (result) { 866 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 867 vsoc_remove_device(pdev); 868 return -ENOSPC; 869 } 870 /* Check that all regions are well formed */ 871 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 872 const struct vsoc_device_region *region = vsoc_dev.regions + i; 873 874 if (!PAGE_ALIGNED(region->region_begin_offset) || 875 !PAGE_ALIGNED(region->region_end_offset)) { 876 dev_err(&vsoc_dev.dev->dev, 877 "region %d not aligned (%x:%x)", i, 878 region->region_begin_offset, 879 region->region_end_offset); 880 vsoc_remove_device(pdev); 881 return -EFAULT; 882 } 883 if (region->region_begin_offset >= region->region_end_offset || 884 region->region_end_offset > vsoc_dev.shm_size) { 885 dev_err(&vsoc_dev.dev->dev, 886 "region %d offsets are wrong: %x %x %zx", 887 i, region->region_begin_offset, 888 region->region_end_offset, vsoc_dev.shm_size); 889 vsoc_remove_device(pdev); 890 return -EFAULT; 891 } 892 if (region->managed_by >= vsoc_dev.layout->region_count) { 893 dev_err(&vsoc_dev.dev->dev, 894 "region %d has invalid owner: %u", 895 i, region->managed_by); 896 vsoc_remove_device(pdev); 897 return -EFAULT; 898 } 899 } 900 vsoc_dev.msix_enabled = true; 901 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 902 const struct vsoc_device_region *region = vsoc_dev.regions + i; 903 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 904 const struct vsoc_signal_table_layout *h_to_g_signal_table = 905 &region->host_to_guest_signal_table; 906 const struct vsoc_signal_table_layout *g_to_h_signal_table = 907 &region->guest_to_host_signal_table; 908 909 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 910 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 911 name_sz); 912 dev_info(&pdev->dev, "region %d name=%s\n", 913 i, vsoc_dev.regions_data[i].name); 914 init_waitqueue_head 915 (&vsoc_dev.regions_data[i].interrupt_wait_queue); 916 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 917 vsoc_dev.regions_data[i].incoming_signalled = 918 shm_off_to_virtual_addr(region->region_begin_offset) + 919 h_to_g_signal_table->interrupt_signalled_offset; 920 vsoc_dev.regions_data[i].outgoing_signalled = 921 shm_off_to_virtual_addr(region->region_begin_offset) + 922 g_to_h_signal_table->interrupt_signalled_offset; 923 result = request_irq(vsoc_dev.msix_entries[i].vector, 924 vsoc_interrupt, 0, 925 vsoc_dev.regions_data[i].name, 926 vsoc_dev.regions_data + i); 927 if (result) { 928 dev_info(&pdev->dev, 929 "request_irq failed irq=%d vector=%d\n", 930 i, vsoc_dev.msix_entries[i].vector); 931 vsoc_remove_device(pdev); 932 return -ENOSPC; 933 } 934 vsoc_dev.regions_data[i].irq_requested = true; 935 if (!device_create(vsoc_dev.class, NULL, 936 MKDEV(vsoc_dev.major, i), 937 NULL, vsoc_dev.regions_data[i].name)) { 938 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 939 vsoc_remove_device(pdev); 940 return -EBUSY; 941 } 942 vsoc_dev.regions_data[i].device_created = true; 943 } 944 return 0; 945} 946 947/* 948 * This should undo all of the allocations in the probe function in reverse 949 * order. 950 * 951 * Notes: 952 * 953 * The device may have been partially initialized, so double check 954 * that the allocations happened. 955 * 956 * This function may be called multiple times, so mark resources as freed 957 * as they are deallocated. 958 */ 959static void vsoc_remove_device(struct pci_dev *pdev) 960{ 961 int i; 962 /* 963 * pdev is the first thing to be set on probe and the last thing 964 * to be cleared here. If it's NULL then there is no cleanup. 965 */ 966 if (!pdev || !vsoc_dev.dev) 967 return; 968 dev_info(&pdev->dev, "remove_device\n"); 969 if (vsoc_dev.regions_data) { 970 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 971 if (vsoc_dev.regions_data[i].device_created) { 972 device_destroy(vsoc_dev.class, 973 MKDEV(vsoc_dev.major, i)); 974 vsoc_dev.regions_data[i].device_created = false; 975 } 976 if (vsoc_dev.regions_data[i].irq_requested) 977 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 978 vsoc_dev.regions_data[i].irq_requested = false; 979 } 980 kfree(vsoc_dev.regions_data); 981 vsoc_dev.regions_data = NULL; 982 } 983 if (vsoc_dev.msix_enabled) { 984 pci_disable_msix(pdev); 985 vsoc_dev.msix_enabled = false; 986 } 987 kfree(vsoc_dev.msix_entries); 988 vsoc_dev.msix_entries = NULL; 989 vsoc_dev.regions = NULL; 990 if (vsoc_dev.class_added) { 991 class_destroy(vsoc_dev.class); 992 vsoc_dev.class_added = false; 993 } 994 if (vsoc_dev.cdev_added) { 995 cdev_del(&vsoc_dev.cdev); 996 vsoc_dev.cdev_added = false; 997 } 998 if (vsoc_dev.major && vsoc_dev.layout) { 999 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 1000 vsoc_dev.layout->region_count); 1001 vsoc_dev.major = 0; 1002 } 1003 vsoc_dev.layout = NULL; 1004 if (vsoc_dev.kernel_mapped_shm) { 1005 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1006 vsoc_dev.kernel_mapped_shm = NULL; 1007 } 1008 if (vsoc_dev.regs) { 1009 pci_iounmap(pdev, vsoc_dev.regs); 1010 vsoc_dev.regs = NULL; 1011 } 1012 if (vsoc_dev.requested_regions) { 1013 pci_release_regions(pdev); 1014 vsoc_dev.requested_regions = false; 1015 } 1016 if (vsoc_dev.enabled_device) { 1017 pci_disable_device(pdev); 1018 vsoc_dev.enabled_device = false; 1019 } 1020 /* Do this last: it indicates that the device is not initialized. */ 1021 vsoc_dev.dev = NULL; 1022} 1023 1024static void __exit vsoc_cleanup_module(void) 1025{ 1026 vsoc_remove_device(vsoc_dev.dev); 1027 pci_unregister_driver(&vsoc_pci_driver); 1028} 1029 1030static int __init vsoc_init_module(void) 1031{ 1032 int err = -ENOMEM; 1033 1034 INIT_LIST_HEAD(&vsoc_dev.permissions); 1035 mutex_init(&vsoc_dev.mtx); 1036 1037 err = pci_register_driver(&vsoc_pci_driver); 1038 if (err < 0) 1039 return err; 1040 return 0; 1041} 1042 1043static int vsoc_open(struct inode *inode, struct file *filp) 1044{ 1045 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1046 int ret = vsoc_validate_inode(inode); 1047 1048 if (ret) 1049 return ret; 1050 filp->private_data = 1051 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1052 if (!filp->private_data) 1053 return -ENOMEM; 1054 return 0; 1055} 1056 1057static int vsoc_release(struct inode *inode, struct file *filp) 1058{ 1059 struct vsoc_private_data *private_data = NULL; 1060 struct fd_scoped_permission_node *node = NULL; 1061 struct vsoc_device_region *owner_region_p = NULL; 1062 int retval = vsoc_validate_filep(filp); 1063 1064 if (retval) 1065 return retval; 1066 private_data = (struct vsoc_private_data *)filp->private_data; 1067 if (!private_data) 1068 return 0; 1069 1070 node = private_data->fd_scoped_permission_node; 1071 if (node) { 1072 owner_region_p = vsoc_region_from_inode(inode); 1073 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1074 owner_region_p = 1075 &vsoc_dev.regions[owner_region_p->managed_by]; 1076 } 1077 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1078 private_data->fd_scoped_permission_node = NULL; 1079 } 1080 kfree(private_data); 1081 filp->private_data = NULL; 1082 1083 return 0; 1084} 1085 1086/* 1087 * Returns the device relative offset and length of the area specified by the 1088 * fd scoped permission. If there is no fd scoped permission set, a default 1089 * permission covering the entire region is assumed, unless the region is owned 1090 * by another one, in which case the default is a permission with zero size. 1091 */ 1092static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1093{ 1094 __u32 off = 0; 1095 ssize_t length = 0; 1096 struct vsoc_device_region *region_p; 1097 struct fd_scoped_permission *perm; 1098 1099 region_p = vsoc_region_from_filep(filp); 1100 off = region_p->region_begin_offset; 1101 perm = &((struct vsoc_private_data *)filp->private_data)-> 1102 fd_scoped_permission_node->permission; 1103 if (perm) { 1104 off += perm->begin_offset; 1105 length = perm->end_offset - perm->begin_offset; 1106 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1107 /* No permission set and the regions is not owned by another, 1108 * default to full region access. 1109 */ 1110 length = vsoc_device_region_size(region_p); 1111 } else { 1112 /* return zero length, access is denied. */ 1113 length = 0; 1114 } 1115 if (area_offset) 1116 *area_offset = off; 1117 return length; 1118} 1119 1120static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1121{ 1122 unsigned long len = vma->vm_end - vma->vm_start; 1123 __u32 area_off; 1124 phys_addr_t mem_off; 1125 ssize_t area_len; 1126 int retval = vsoc_validate_filep(filp); 1127 1128 if (retval) 1129 return retval; 1130 area_len = vsoc_get_area(filp, &area_off); 1131 /* Add the requested offset */ 1132 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1133 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1134 if (area_len < len) 1135 return -EINVAL; 1136 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1137 mem_off = shm_off_to_phys_addr(area_off); 1138 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1139 len, vma->vm_page_prot)) 1140 return -EAGAIN; 1141 return 0; 1142} 1143 1144module_init(vsoc_init_module); 1145module_exit(vsoc_cleanup_module); 1146 1147MODULE_LICENSE("GPL"); 1148MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1149MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1150MODULE_VERSION("1.0");