at v4.18-rc4 1152 lines 33 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * drivers/android/staging/vsoc.c 4 * 5 * Android Virtual System on a Chip (VSoC) driver 6 * 7 * Copyright (C) 2017 Google, Inc. 8 * 9 * Author: ghartman@google.com 10 * 11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 13 * 14 * Based on cirrusfb.c and 8139cp.c: 15 * Copyright 1999-2001 Jeff Garzik 16 * Copyright 2001-2004 Jeff Garzik 17 */ 18 19#include <linux/dma-mapping.h> 20#include <linux/freezer.h> 21#include <linux/futex.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/mutex.h> 26#include <linux/pci.h> 27#include <linux/proc_fs.h> 28#include <linux/sched.h> 29#include <linux/syscalls.h> 30#include <linux/uaccess.h> 31#include <linux/interrupt.h> 32#include <linux/mutex.h> 33#include <linux/cdev.h> 34#include <linux/file.h> 35#include "uapi/vsoc_shm.h" 36 37#define VSOC_DEV_NAME "vsoc" 38 39/* 40 * Description of the ivshmem-doorbell PCI device used by QEmu. These 41 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 42 * the QEmu repository. This was last reconciled with the version that 43 * came out with 2.8 44 */ 45 46/* 47 * These constants are determined KVM Inter-VM shared memory device 48 * register offsets 49 */ 50enum { 51 INTR_MASK = 0x00, /* Interrupt Mask */ 52 INTR_STATUS = 0x04, /* Interrupt Status */ 53 IV_POSITION = 0x08, /* VM ID */ 54 DOORBELL = 0x0c, /* Doorbell */ 55}; 56 57static const int REGISTER_BAR; /* Equal to 0 */ 58static const int MAX_REGISTER_BAR_LEN = 0x100; 59/* 60 * The MSI-x BAR is not used directly. 61 * 62 * static const int MSI_X_BAR = 1; 63 */ 64static const int SHARED_MEMORY_BAR = 2; 65 66struct vsoc_region_data { 67 char name[VSOC_DEVICE_NAME_SZ + 1]; 68 wait_queue_head_t interrupt_wait_queue; 69 /* TODO(b/73664181): Use multiple futex wait queues */ 70 wait_queue_head_t futex_wait_queue; 71 /* Flag indicating that an interrupt has been signalled by the host. */ 72 atomic_t *incoming_signalled; 73 /* Flag indicating the guest has signalled the host. */ 74 atomic_t *outgoing_signalled; 75 bool irq_requested; 76 bool device_created; 77}; 78 79struct vsoc_device { 80 /* Kernel virtual address of REGISTER_BAR. */ 81 void __iomem *regs; 82 /* Physical address of SHARED_MEMORY_BAR. */ 83 phys_addr_t shm_phys_start; 84 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 85 void __iomem *kernel_mapped_shm; 86 /* Size of the entire shared memory window in bytes. */ 87 size_t shm_size; 88 /* 89 * Pointer to the virtual address of the shared memory layout structure. 90 * This is probably identical to kernel_mapped_shm, but saving this 91 * here saves a lot of annoying casts. 92 */ 93 struct vsoc_shm_layout_descriptor *layout; 94 /* 95 * Points to a table of region descriptors in the kernel's virtual 96 * address space. Calculated from 97 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 98 */ 99 struct vsoc_device_region *regions; 100 /* Head of a list of permissions that have been granted. */ 101 struct list_head permissions; 102 struct pci_dev *dev; 103 /* Per-region (and therefore per-interrupt) information. */ 104 struct vsoc_region_data *regions_data; 105 /* 106 * Table of msi-x entries. This has to be separated from struct 107 * vsoc_region_data because the kernel deals with them as an array. 108 */ 109 struct msix_entry *msix_entries; 110 /* Mutex that protectes the permission list */ 111 struct mutex mtx; 112 /* Major number assigned by the kernel */ 113 int major; 114 /* Character device assigned by the kernel */ 115 struct cdev cdev; 116 /* Device class assigned by the kernel */ 117 struct class *class; 118 /* 119 * Flags that indicate what we've initialized. These are used to do an 120 * orderly cleanup of the device. 121 */ 122 bool enabled_device; 123 bool requested_regions; 124 bool cdev_added; 125 bool class_added; 126 bool msix_enabled; 127}; 128 129static struct vsoc_device vsoc_dev; 130 131/* 132 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 133 */ 134 135struct fd_scoped_permission_node { 136 struct fd_scoped_permission permission; 137 struct list_head list; 138}; 139 140struct vsoc_private_data { 141 struct fd_scoped_permission_node *fd_scoped_permission_node; 142}; 143 144static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 145static int vsoc_mmap(struct file *, struct vm_area_struct *); 146static int vsoc_open(struct inode *, struct file *); 147static int vsoc_release(struct inode *, struct file *); 148static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 149static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 150static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 151static int 152do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 153 struct fd_scoped_permission_node *np, 154 struct fd_scoped_permission_arg __user *arg); 155static void 156do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 157 struct fd_scoped_permission *perm); 158static long do_vsoc_describe_region(struct file *, 159 struct vsoc_device_region __user *); 160static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 161 162/** 163 * Validate arguments on entry points to the driver. 164 */ 165inline int vsoc_validate_inode(struct inode *inode) 166{ 167 if (iminor(inode) >= vsoc_dev.layout->region_count) { 168 dev_err(&vsoc_dev.dev->dev, 169 "describe_region: invalid region %d\n", iminor(inode)); 170 return -ENODEV; 171 } 172 return 0; 173} 174 175inline int vsoc_validate_filep(struct file *filp) 176{ 177 int ret = vsoc_validate_inode(file_inode(filp)); 178 179 if (ret) 180 return ret; 181 if (!filp->private_data) { 182 dev_err(&vsoc_dev.dev->dev, 183 "No private data on fd, region %d\n", 184 iminor(file_inode(filp))); 185 return -EBADFD; 186 } 187 return 0; 188} 189 190/* Converts from shared memory offset to virtual address */ 191static inline void *shm_off_to_virtual_addr(__u32 offset) 192{ 193 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 194} 195 196/* Converts from shared memory offset to physical address */ 197static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 198{ 199 return vsoc_dev.shm_phys_start + offset; 200} 201 202/** 203 * Convenience functions to obtain the region from the inode or file. 204 * Dangerous to call before validating the inode/file. 205 */ 206static 207inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) 208{ 209 return &vsoc_dev.regions[iminor(inode)]; 210} 211 212static 213inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) 214{ 215 return vsoc_region_from_inode(file_inode(inode)); 216} 217 218static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 219{ 220 return r->region_end_offset - r->region_begin_offset; 221} 222 223static const struct file_operations vsoc_ops = { 224 .owner = THIS_MODULE, 225 .open = vsoc_open, 226 .mmap = vsoc_mmap, 227 .read = vsoc_read, 228 .unlocked_ioctl = vsoc_ioctl, 229 .compat_ioctl = vsoc_ioctl, 230 .write = vsoc_write, 231 .llseek = vsoc_lseek, 232 .release = vsoc_release, 233}; 234 235static struct pci_device_id vsoc_id_table[] = { 236 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 237 {0}, 238}; 239 240MODULE_DEVICE_TABLE(pci, vsoc_id_table); 241 242static void vsoc_remove_device(struct pci_dev *pdev); 243static int vsoc_probe_device(struct pci_dev *pdev, 244 const struct pci_device_id *ent); 245 246static struct pci_driver vsoc_pci_driver = { 247 .name = "vsoc", 248 .id_table = vsoc_id_table, 249 .probe = vsoc_probe_device, 250 .remove = vsoc_remove_device, 251}; 252 253static int 254do_create_fd_scoped_permission(struct vsoc_device_region *region_p, 255 struct fd_scoped_permission_node *np, 256 struct fd_scoped_permission_arg __user *arg) 257{ 258 struct file *managed_filp; 259 s32 managed_fd; 260 atomic_t *owner_ptr = NULL; 261 struct vsoc_device_region *managed_region_p; 262 263 if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || 264 copy_from_user(&managed_fd, 265 &arg->managed_region_fd, sizeof(managed_fd))) { 266 return -EFAULT; 267 } 268 managed_filp = fdget(managed_fd).file; 269 /* Check that it's a valid fd, */ 270 if (!managed_filp || vsoc_validate_filep(managed_filp)) 271 return -EPERM; 272 /* EEXIST if the given fd already has a permission. */ 273 if (((struct vsoc_private_data *)managed_filp->private_data)-> 274 fd_scoped_permission_node) 275 return -EEXIST; 276 managed_region_p = vsoc_region_from_filep(managed_filp); 277 /* Check that the provided region is managed by this one */ 278 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 279 return -EPERM; 280 /* The area must be well formed and have non-zero size */ 281 if (np->permission.begin_offset >= np->permission.end_offset) 282 return -EINVAL; 283 /* The area must fit in the memory window */ 284 if (np->permission.end_offset > 285 vsoc_device_region_size(managed_region_p)) 286 return -ERANGE; 287 /* The area must be in the region data section */ 288 if (np->permission.begin_offset < 289 managed_region_p->offset_of_region_data) 290 return -ERANGE; 291 /* The area must be page aligned */ 292 if (!PAGE_ALIGNED(np->permission.begin_offset) || 293 !PAGE_ALIGNED(np->permission.end_offset)) 294 return -EINVAL; 295 /* Owner offset must be naturally aligned in the window */ 296 if (np->permission.owner_offset & 297 (sizeof(np->permission.owner_offset) - 1)) 298 return -EINVAL; 299 /* The owner flag must reside in the owner memory */ 300 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 301 vsoc_device_region_size(region_p)) 302 return -ERANGE; 303 /* The owner flag must reside in the data section */ 304 if (np->permission.owner_offset < region_p->offset_of_region_data) 305 return -EINVAL; 306 /* The owner value must change to claim the memory */ 307 if (np->permission.owned_value == VSOC_REGION_FREE) 308 return -EINVAL; 309 owner_ptr = 310 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 311 np->permission.owner_offset); 312 /* We've already verified that this is in the shared memory window, so 313 * it should be safe to write to this address. 314 */ 315 if (atomic_cmpxchg(owner_ptr, 316 VSOC_REGION_FREE, 317 np->permission.owned_value) != VSOC_REGION_FREE) { 318 return -EBUSY; 319 } 320 ((struct vsoc_private_data *)managed_filp->private_data)-> 321 fd_scoped_permission_node = np; 322 /* The file offset needs to be adjusted if the calling 323 * process did any read/write operations on the fd 324 * before creating the permission. 325 */ 326 if (managed_filp->f_pos) { 327 if (managed_filp->f_pos > np->permission.end_offset) { 328 /* If the offset is beyond the permission end, set it 329 * to the end. 330 */ 331 managed_filp->f_pos = np->permission.end_offset; 332 } else { 333 /* If the offset is within the permission interval 334 * keep it there otherwise reset it to zero. 335 */ 336 if (managed_filp->f_pos < np->permission.begin_offset) { 337 managed_filp->f_pos = 0; 338 } else { 339 managed_filp->f_pos -= 340 np->permission.begin_offset; 341 } 342 } 343 } 344 return 0; 345} 346 347static void 348do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, 349 struct fd_scoped_permission_node *node) 350{ 351 if (node) { 352 do_destroy_fd_scoped_permission(owner_region_p, 353 &node->permission); 354 mutex_lock(&vsoc_dev.mtx); 355 list_del(&node->list); 356 mutex_unlock(&vsoc_dev.mtx); 357 kfree(node); 358 } 359} 360 361static void 362do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, 363 struct fd_scoped_permission *perm) 364{ 365 atomic_t *owner_ptr = NULL; 366 int prev = 0; 367 368 if (!perm) 369 return; 370 owner_ptr = (atomic_t *)shm_off_to_virtual_addr 371 (owner_region_p->region_begin_offset + perm->owner_offset); 372 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 373 if (prev != perm->owned_value) 374 dev_err(&vsoc_dev.dev->dev, 375 "%x-%x: owner (%s) %x: expected to be %x was %x", 376 perm->begin_offset, perm->end_offset, 377 owner_region_p->device_name, perm->owner_offset, 378 perm->owned_value, prev); 379} 380 381static long do_vsoc_describe_region(struct file *filp, 382 struct vsoc_device_region __user *dest) 383{ 384 struct vsoc_device_region *region_p; 385 int retval = vsoc_validate_filep(filp); 386 387 if (retval) 388 return retval; 389 region_p = vsoc_region_from_filep(filp); 390 if (copy_to_user(dest, region_p, sizeof(*region_p))) 391 return -EFAULT; 392 return 0; 393} 394 395/** 396 * Implements the inner logic of cond_wait. Copies to and from userspace are 397 * done in the helper function below. 398 */ 399static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 400{ 401 DEFINE_WAIT(wait); 402 u32 region_number = iminor(file_inode(filp)); 403 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 404 struct hrtimer_sleeper timeout, *to = NULL; 405 int ret = 0; 406 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 407 atomic_t *address = NULL; 408 struct timespec ts; 409 410 /* Ensure that the offset is aligned */ 411 if (arg->offset & (sizeof(uint32_t) - 1)) 412 return -EADDRNOTAVAIL; 413 /* Ensure that the offset is within shared memory */ 414 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 415 sizeof(uint32_t) > region_p->region_end_offset) 416 return -E2BIG; 417 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 418 arg->offset); 419 420 /* Ensure that the type of wait is valid */ 421 switch (arg->wait_type) { 422 case VSOC_WAIT_IF_EQUAL: 423 break; 424 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 425 to = &timeout; 426 break; 427 default: 428 return -EINVAL; 429 } 430 431 if (to) { 432 /* Copy the user-supplied timesec into the kernel structure. 433 * We do things this way to flatten differences between 32 bit 434 * and 64 bit timespecs. 435 */ 436 ts.tv_sec = arg->wake_time_sec; 437 ts.tv_nsec = arg->wake_time_nsec; 438 439 if (!timespec_valid(&ts)) 440 return -EINVAL; 441 hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, 442 HRTIMER_MODE_ABS); 443 hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), 444 current->timer_slack_ns); 445 446 hrtimer_init_sleeper(to, current); 447 } 448 449 while (1) { 450 prepare_to_wait(&data->futex_wait_queue, &wait, 451 TASK_INTERRUPTIBLE); 452 /* 453 * Check the sentinel value after prepare_to_wait. If the value 454 * changes after this check the writer will call signal, 455 * changing the task state from INTERRUPTIBLE to RUNNING. That 456 * will ensure that schedule() will eventually schedule this 457 * task. 458 */ 459 if (atomic_read(address) != arg->value) { 460 ret = 0; 461 break; 462 } 463 if (to) { 464 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 465 if (likely(to->task)) 466 freezable_schedule(); 467 hrtimer_cancel(&to->timer); 468 if (!to->task) { 469 ret = -ETIMEDOUT; 470 break; 471 } 472 } else { 473 freezable_schedule(); 474 } 475 /* Count the number of times that we woke up. This is useful 476 * for unit testing. 477 */ 478 ++arg->wakes; 479 if (signal_pending(current)) { 480 ret = -EINTR; 481 break; 482 } 483 } 484 finish_wait(&data->futex_wait_queue, &wait); 485 if (to) 486 destroy_hrtimer_on_stack(&to->timer); 487 return ret; 488} 489 490/** 491 * Handles the details of copying from/to userspace to ensure that the copies 492 * happen on all of the return paths of cond_wait. 493 */ 494static int do_vsoc_cond_wait(struct file *filp, 495 struct vsoc_cond_wait __user *untrusted_in) 496{ 497 struct vsoc_cond_wait arg; 498 int rval = 0; 499 500 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 501 return -EFAULT; 502 /* wakes is an out parameter. Initialize it to something sensible. */ 503 arg.wakes = 0; 504 rval = handle_vsoc_cond_wait(filp, &arg); 505 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 506 return -EFAULT; 507 return rval; 508} 509 510static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 511{ 512 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 513 u32 region_number = iminor(file_inode(filp)); 514 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 515 /* Ensure that the offset is aligned */ 516 if (offset & (sizeof(uint32_t) - 1)) 517 return -EADDRNOTAVAIL; 518 /* Ensure that the offset is within shared memory */ 519 if (((uint64_t)offset) + region_p->region_begin_offset + 520 sizeof(uint32_t) > region_p->region_end_offset) 521 return -E2BIG; 522 /* 523 * TODO(b/73664181): Use multiple futex wait queues. 524 * We need to wake every sleeper when the condition changes. Typically 525 * only a single thread will be waiting on the condition, but there 526 * are exceptions. The worst case is about 10 threads. 527 */ 528 wake_up_interruptible_all(&data->futex_wait_queue); 529 return 0; 530} 531 532static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 533{ 534 int rv = 0; 535 struct vsoc_device_region *region_p; 536 u32 reg_num; 537 struct vsoc_region_data *reg_data; 538 int retval = vsoc_validate_filep(filp); 539 540 if (retval) 541 return retval; 542 region_p = vsoc_region_from_filep(filp); 543 reg_num = iminor(file_inode(filp)); 544 reg_data = vsoc_dev.regions_data + reg_num; 545 switch (cmd) { 546 case VSOC_CREATE_FD_SCOPED_PERMISSION: 547 { 548 struct fd_scoped_permission_node *node = NULL; 549 550 node = kzalloc(sizeof(*node), GFP_KERNEL); 551 /* We can't allocate memory for the permission */ 552 if (!node) 553 return -ENOMEM; 554 INIT_LIST_HEAD(&node->list); 555 rv = do_create_fd_scoped_permission 556 (region_p, 557 node, 558 (struct fd_scoped_permission_arg __user *)arg); 559 if (!rv) { 560 mutex_lock(&vsoc_dev.mtx); 561 list_add(&node->list, &vsoc_dev.permissions); 562 mutex_unlock(&vsoc_dev.mtx); 563 } else { 564 kfree(node); 565 return rv; 566 } 567 } 568 break; 569 570 case VSOC_GET_FD_SCOPED_PERMISSION: 571 { 572 struct fd_scoped_permission_node *node = 573 ((struct vsoc_private_data *)filp->private_data)-> 574 fd_scoped_permission_node; 575 if (!node) 576 return -ENOENT; 577 if (copy_to_user 578 ((struct fd_scoped_permission __user *)arg, 579 &node->permission, sizeof(node->permission))) 580 return -EFAULT; 581 } 582 break; 583 584 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 585 if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { 586 writel(reg_num, vsoc_dev.regs + DOORBELL); 587 return 0; 588 } else { 589 return -EBUSY; 590 } 591 break; 592 593 case VSOC_SEND_INTERRUPT_TO_HOST: 594 writel(reg_num, vsoc_dev.regs + DOORBELL); 595 return 0; 596 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 597 wait_event_interruptible 598 (reg_data->interrupt_wait_queue, 599 (atomic_read(reg_data->incoming_signalled) != 0)); 600 break; 601 602 case VSOC_DESCRIBE_REGION: 603 return do_vsoc_describe_region 604 (filp, 605 (struct vsoc_device_region __user *)arg); 606 607 case VSOC_SELF_INTERRUPT: 608 atomic_set(reg_data->incoming_signalled, 1); 609 wake_up_interruptible(&reg_data->interrupt_wait_queue); 610 break; 611 612 case VSOC_COND_WAIT: 613 return do_vsoc_cond_wait(filp, 614 (struct vsoc_cond_wait __user *)arg); 615 case VSOC_COND_WAKE: 616 return do_vsoc_cond_wake(filp, arg); 617 618 default: 619 return -EINVAL; 620 } 621 return 0; 622} 623 624static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 625 loff_t *poffset) 626{ 627 __u32 area_off; 628 const void *area_p; 629 ssize_t area_len; 630 int retval = vsoc_validate_filep(filp); 631 632 if (retval) 633 return retval; 634 area_len = vsoc_get_area(filp, &area_off); 635 area_p = shm_off_to_virtual_addr(area_off); 636 area_p += *poffset; 637 area_len -= *poffset; 638 if (area_len <= 0) 639 return 0; 640 if (area_len < len) 641 len = area_len; 642 if (copy_to_user(buffer, area_p, len)) 643 return -EFAULT; 644 *poffset += len; 645 return len; 646} 647 648static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 649{ 650 ssize_t area_len = 0; 651 int retval = vsoc_validate_filep(filp); 652 653 if (retval) 654 return retval; 655 area_len = vsoc_get_area(filp, NULL); 656 switch (origin) { 657 case SEEK_SET: 658 break; 659 660 case SEEK_CUR: 661 if (offset > 0 && offset + filp->f_pos < 0) 662 return -EOVERFLOW; 663 offset += filp->f_pos; 664 break; 665 666 case SEEK_END: 667 if (offset > 0 && offset + area_len < 0) 668 return -EOVERFLOW; 669 offset += area_len; 670 break; 671 672 case SEEK_DATA: 673 if (offset >= area_len) 674 return -EINVAL; 675 if (offset < 0) 676 offset = 0; 677 break; 678 679 case SEEK_HOLE: 680 /* Next hole is always the end of the region, unless offset is 681 * beyond that 682 */ 683 if (offset < area_len) 684 offset = area_len; 685 break; 686 687 default: 688 return -EINVAL; 689 } 690 691 if (offset < 0 || offset > area_len) 692 return -EINVAL; 693 filp->f_pos = offset; 694 695 return offset; 696} 697 698static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 699 size_t len, loff_t *poffset) 700{ 701 __u32 area_off; 702 void *area_p; 703 ssize_t area_len; 704 int retval = vsoc_validate_filep(filp); 705 706 if (retval) 707 return retval; 708 area_len = vsoc_get_area(filp, &area_off); 709 area_p = shm_off_to_virtual_addr(area_off); 710 area_p += *poffset; 711 area_len -= *poffset; 712 if (area_len <= 0) 713 return 0; 714 if (area_len < len) 715 len = area_len; 716 if (copy_from_user(area_p, buffer, len)) 717 return -EFAULT; 718 *poffset += len; 719 return len; 720} 721 722static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 723{ 724 struct vsoc_region_data *region_data = 725 (struct vsoc_region_data *)region_data_v; 726 int reg_num = region_data - vsoc_dev.regions_data; 727 728 if (unlikely(!region_data)) 729 return IRQ_NONE; 730 731 if (unlikely(reg_num < 0 || 732 reg_num >= vsoc_dev.layout->region_count)) { 733 dev_err(&vsoc_dev.dev->dev, 734 "invalid irq @%p reg_num=0x%04x\n", 735 region_data, reg_num); 736 return IRQ_NONE; 737 } 738 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 739 dev_err(&vsoc_dev.dev->dev, 740 "irq not aligned @%p reg_num=0x%04x\n", 741 region_data, reg_num); 742 return IRQ_NONE; 743 } 744 wake_up_interruptible(&region_data->interrupt_wait_queue); 745 return IRQ_HANDLED; 746} 747 748static int vsoc_probe_device(struct pci_dev *pdev, 749 const struct pci_device_id *ent) 750{ 751 int result; 752 int i; 753 resource_size_t reg_size; 754 dev_t devt; 755 756 vsoc_dev.dev = pdev; 757 result = pci_enable_device(pdev); 758 if (result) { 759 dev_err(&pdev->dev, 760 "pci_enable_device failed %s: error %d\n", 761 pci_name(pdev), result); 762 return result; 763 } 764 vsoc_dev.enabled_device = true; 765 result = pci_request_regions(pdev, "vsoc"); 766 if (result < 0) { 767 dev_err(&pdev->dev, "pci_request_regions failed\n"); 768 vsoc_remove_device(pdev); 769 return -EBUSY; 770 } 771 vsoc_dev.requested_regions = true; 772 /* Set up the control registers in BAR 0 */ 773 reg_size = pci_resource_len(pdev, REGISTER_BAR); 774 if (reg_size > MAX_REGISTER_BAR_LEN) 775 vsoc_dev.regs = 776 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 777 else 778 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 779 780 if (!vsoc_dev.regs) { 781 dev_err(&pdev->dev, 782 "cannot map registers of size %zu\n", 783 (size_t)reg_size); 784 vsoc_remove_device(pdev); 785 return -EBUSY; 786 } 787 788 /* Map the shared memory in BAR 2 */ 789 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 790 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 791 792 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 793 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 794 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 795 if (!vsoc_dev.kernel_mapped_shm) { 796 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 797 vsoc_remove_device(pdev); 798 return -EBUSY; 799 } 800 801 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 802 vsoc_dev.kernel_mapped_shm; 803 dev_info(&pdev->dev, "major_version: %d\n", 804 vsoc_dev.layout->major_version); 805 dev_info(&pdev->dev, "minor_version: %d\n", 806 vsoc_dev.layout->minor_version); 807 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 808 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 809 if (vsoc_dev.layout->major_version != 810 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 811 dev_err(&vsoc_dev.dev->dev, 812 "driver supports only major_version %d\n", 813 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 814 vsoc_remove_device(pdev); 815 return -EBUSY; 816 } 817 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 818 VSOC_DEV_NAME); 819 if (result) { 820 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 821 vsoc_remove_device(pdev); 822 return -EBUSY; 823 } 824 vsoc_dev.major = MAJOR(devt); 825 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 826 vsoc_dev.cdev.owner = THIS_MODULE; 827 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 828 if (result) { 829 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 830 vsoc_remove_device(pdev); 831 return -EBUSY; 832 } 833 vsoc_dev.cdev_added = true; 834 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 835 if (IS_ERR(vsoc_dev.class)) { 836 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 837 vsoc_remove_device(pdev); 838 return PTR_ERR(vsoc_dev.class); 839 } 840 vsoc_dev.class_added = true; 841 vsoc_dev.regions = (struct vsoc_device_region __force *) 842 ((void *)vsoc_dev.layout + 843 vsoc_dev.layout->vsoc_region_desc_offset); 844 vsoc_dev.msix_entries = 845 kcalloc(vsoc_dev.layout->region_count, 846 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 847 if (!vsoc_dev.msix_entries) { 848 dev_err(&vsoc_dev.dev->dev, 849 "unable to allocate msix_entries\n"); 850 vsoc_remove_device(pdev); 851 return -ENOSPC; 852 } 853 vsoc_dev.regions_data = 854 kcalloc(vsoc_dev.layout->region_count, 855 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 856 if (!vsoc_dev.regions_data) { 857 dev_err(&vsoc_dev.dev->dev, 858 "unable to allocate regions' data\n"); 859 vsoc_remove_device(pdev); 860 return -ENOSPC; 861 } 862 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 863 vsoc_dev.msix_entries[i].entry = i; 864 865 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 866 vsoc_dev.layout->region_count); 867 if (result) { 868 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 869 vsoc_remove_device(pdev); 870 return -ENOSPC; 871 } 872 /* Check that all regions are well formed */ 873 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 874 const struct vsoc_device_region *region = vsoc_dev.regions + i; 875 876 if (!PAGE_ALIGNED(region->region_begin_offset) || 877 !PAGE_ALIGNED(region->region_end_offset)) { 878 dev_err(&vsoc_dev.dev->dev, 879 "region %d not aligned (%x:%x)", i, 880 region->region_begin_offset, 881 region->region_end_offset); 882 vsoc_remove_device(pdev); 883 return -EFAULT; 884 } 885 if (region->region_begin_offset >= region->region_end_offset || 886 region->region_end_offset > vsoc_dev.shm_size) { 887 dev_err(&vsoc_dev.dev->dev, 888 "region %d offsets are wrong: %x %x %zx", 889 i, region->region_begin_offset, 890 region->region_end_offset, vsoc_dev.shm_size); 891 vsoc_remove_device(pdev); 892 return -EFAULT; 893 } 894 if (region->managed_by >= vsoc_dev.layout->region_count) { 895 dev_err(&vsoc_dev.dev->dev, 896 "region %d has invalid owner: %u", 897 i, region->managed_by); 898 vsoc_remove_device(pdev); 899 return -EFAULT; 900 } 901 } 902 vsoc_dev.msix_enabled = true; 903 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 904 const struct vsoc_device_region *region = vsoc_dev.regions + i; 905 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 906 const struct vsoc_signal_table_layout *h_to_g_signal_table = 907 &region->host_to_guest_signal_table; 908 const struct vsoc_signal_table_layout *g_to_h_signal_table = 909 &region->guest_to_host_signal_table; 910 911 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 912 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 913 name_sz); 914 dev_info(&pdev->dev, "region %d name=%s\n", 915 i, vsoc_dev.regions_data[i].name); 916 init_waitqueue_head 917 (&vsoc_dev.regions_data[i].interrupt_wait_queue); 918 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 919 vsoc_dev.regions_data[i].incoming_signalled = 920 shm_off_to_virtual_addr(region->region_begin_offset) + 921 h_to_g_signal_table->interrupt_signalled_offset; 922 vsoc_dev.regions_data[i].outgoing_signalled = 923 shm_off_to_virtual_addr(region->region_begin_offset) + 924 g_to_h_signal_table->interrupt_signalled_offset; 925 result = request_irq(vsoc_dev.msix_entries[i].vector, 926 vsoc_interrupt, 0, 927 vsoc_dev.regions_data[i].name, 928 vsoc_dev.regions_data + i); 929 if (result) { 930 dev_info(&pdev->dev, 931 "request_irq failed irq=%d vector=%d\n", 932 i, vsoc_dev.msix_entries[i].vector); 933 vsoc_remove_device(pdev); 934 return -ENOSPC; 935 } 936 vsoc_dev.regions_data[i].irq_requested = true; 937 if (!device_create(vsoc_dev.class, NULL, 938 MKDEV(vsoc_dev.major, i), 939 NULL, vsoc_dev.regions_data[i].name)) { 940 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 941 vsoc_remove_device(pdev); 942 return -EBUSY; 943 } 944 vsoc_dev.regions_data[i].device_created = true; 945 } 946 return 0; 947} 948 949/* 950 * This should undo all of the allocations in the probe function in reverse 951 * order. 952 * 953 * Notes: 954 * 955 * The device may have been partially initialized, so double check 956 * that the allocations happened. 957 * 958 * This function may be called multiple times, so mark resources as freed 959 * as they are deallocated. 960 */ 961static void vsoc_remove_device(struct pci_dev *pdev) 962{ 963 int i; 964 /* 965 * pdev is the first thing to be set on probe and the last thing 966 * to be cleared here. If it's NULL then there is no cleanup. 967 */ 968 if (!pdev || !vsoc_dev.dev) 969 return; 970 dev_info(&pdev->dev, "remove_device\n"); 971 if (vsoc_dev.regions_data) { 972 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 973 if (vsoc_dev.regions_data[i].device_created) { 974 device_destroy(vsoc_dev.class, 975 MKDEV(vsoc_dev.major, i)); 976 vsoc_dev.regions_data[i].device_created = false; 977 } 978 if (vsoc_dev.regions_data[i].irq_requested) 979 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 980 vsoc_dev.regions_data[i].irq_requested = false; 981 } 982 kfree(vsoc_dev.regions_data); 983 vsoc_dev.regions_data = NULL; 984 } 985 if (vsoc_dev.msix_enabled) { 986 pci_disable_msix(pdev); 987 vsoc_dev.msix_enabled = false; 988 } 989 kfree(vsoc_dev.msix_entries); 990 vsoc_dev.msix_entries = NULL; 991 vsoc_dev.regions = NULL; 992 if (vsoc_dev.class_added) { 993 class_destroy(vsoc_dev.class); 994 vsoc_dev.class_added = false; 995 } 996 if (vsoc_dev.cdev_added) { 997 cdev_del(&vsoc_dev.cdev); 998 vsoc_dev.cdev_added = false; 999 } 1000 if (vsoc_dev.major && vsoc_dev.layout) { 1001 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 1002 vsoc_dev.layout->region_count); 1003 vsoc_dev.major = 0; 1004 } 1005 vsoc_dev.layout = NULL; 1006 if (vsoc_dev.kernel_mapped_shm) { 1007 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1008 vsoc_dev.kernel_mapped_shm = NULL; 1009 } 1010 if (vsoc_dev.regs) { 1011 pci_iounmap(pdev, vsoc_dev.regs); 1012 vsoc_dev.regs = NULL; 1013 } 1014 if (vsoc_dev.requested_regions) { 1015 pci_release_regions(pdev); 1016 vsoc_dev.requested_regions = false; 1017 } 1018 if (vsoc_dev.enabled_device) { 1019 pci_disable_device(pdev); 1020 vsoc_dev.enabled_device = false; 1021 } 1022 /* Do this last: it indicates that the device is not initialized. */ 1023 vsoc_dev.dev = NULL; 1024} 1025 1026static void __exit vsoc_cleanup_module(void) 1027{ 1028 vsoc_remove_device(vsoc_dev.dev); 1029 pci_unregister_driver(&vsoc_pci_driver); 1030} 1031 1032static int __init vsoc_init_module(void) 1033{ 1034 int err = -ENOMEM; 1035 1036 INIT_LIST_HEAD(&vsoc_dev.permissions); 1037 mutex_init(&vsoc_dev.mtx); 1038 1039 err = pci_register_driver(&vsoc_pci_driver); 1040 if (err < 0) 1041 return err; 1042 return 0; 1043} 1044 1045static int vsoc_open(struct inode *inode, struct file *filp) 1046{ 1047 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1048 int ret = vsoc_validate_inode(inode); 1049 1050 if (ret) 1051 return ret; 1052 filp->private_data = 1053 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1054 if (!filp->private_data) 1055 return -ENOMEM; 1056 return 0; 1057} 1058 1059static int vsoc_release(struct inode *inode, struct file *filp) 1060{ 1061 struct vsoc_private_data *private_data = NULL; 1062 struct fd_scoped_permission_node *node = NULL; 1063 struct vsoc_device_region *owner_region_p = NULL; 1064 int retval = vsoc_validate_filep(filp); 1065 1066 if (retval) 1067 return retval; 1068 private_data = (struct vsoc_private_data *)filp->private_data; 1069 if (!private_data) 1070 return 0; 1071 1072 node = private_data->fd_scoped_permission_node; 1073 if (node) { 1074 owner_region_p = vsoc_region_from_inode(inode); 1075 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1076 owner_region_p = 1077 &vsoc_dev.regions[owner_region_p->managed_by]; 1078 } 1079 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1080 private_data->fd_scoped_permission_node = NULL; 1081 } 1082 kfree(private_data); 1083 filp->private_data = NULL; 1084 1085 return 0; 1086} 1087 1088/* 1089 * Returns the device relative offset and length of the area specified by the 1090 * fd scoped permission. If there is no fd scoped permission set, a default 1091 * permission covering the entire region is assumed, unless the region is owned 1092 * by another one, in which case the default is a permission with zero size. 1093 */ 1094static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1095{ 1096 __u32 off = 0; 1097 ssize_t length = 0; 1098 struct vsoc_device_region *region_p; 1099 struct fd_scoped_permission *perm; 1100 1101 region_p = vsoc_region_from_filep(filp); 1102 off = region_p->region_begin_offset; 1103 perm = &((struct vsoc_private_data *)filp->private_data)-> 1104 fd_scoped_permission_node->permission; 1105 if (perm) { 1106 off += perm->begin_offset; 1107 length = perm->end_offset - perm->begin_offset; 1108 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1109 /* No permission set and the regions is not owned by another, 1110 * default to full region access. 1111 */ 1112 length = vsoc_device_region_size(region_p); 1113 } else { 1114 /* return zero length, access is denied. */ 1115 length = 0; 1116 } 1117 if (area_offset) 1118 *area_offset = off; 1119 return length; 1120} 1121 1122static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1123{ 1124 unsigned long len = vma->vm_end - vma->vm_start; 1125 __u32 area_off; 1126 phys_addr_t mem_off; 1127 ssize_t area_len; 1128 int retval = vsoc_validate_filep(filp); 1129 1130 if (retval) 1131 return retval; 1132 area_len = vsoc_get_area(filp, &area_off); 1133 /* Add the requested offset */ 1134 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1135 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1136 if (area_len < len) 1137 return -EINVAL; 1138 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1139 mem_off = shm_off_to_phys_addr(area_off); 1140 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1141 len, vma->vm_page_prot)) 1142 return -EAGAIN; 1143 return 0; 1144} 1145 1146module_init(vsoc_init_module); 1147module_exit(vsoc_cleanup_module); 1148 1149MODULE_LICENSE("GPL"); 1150MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1151MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1152MODULE_VERSION("1.0");