Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.0-rc3 1161 lines 30 kB view raw
1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2016 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * The full GNU General Public License is included in this distribution in 16 * the file called "COPYING". 17 * 18 * Intel Virtio Over PCIe (VOP) driver. 19 * 20 */ 21#include <linux/sched.h> 22#include <linux/poll.h> 23#include <linux/dma-mapping.h> 24 25#include <linux/mic_common.h> 26#include "../common/mic_dev.h" 27 28#include <linux/mic_ioctl.h> 29#include "vop_main.h" 30 31/* Helper API to obtain the VOP PCIe device */ 32static inline struct device *vop_dev(struct vop_vdev *vdev) 33{ 34 return vdev->vpdev->dev.parent; 35} 36 37/* Helper API to check if a virtio device is initialized */ 38static inline int vop_vdev_inited(struct vop_vdev *vdev) 39{ 40 if (!vdev) 41 return -EINVAL; 42 /* Device has not been created yet */ 43 if (!vdev->dd || !vdev->dd->type) { 44 dev_err(vop_dev(vdev), "%s %d err %d\n", 45 __func__, __LINE__, -EINVAL); 46 return -EINVAL; 47 } 48 /* Device has been removed/deleted */ 49 if (vdev->dd->type == -1) { 50 dev_dbg(vop_dev(vdev), "%s %d err %d\n", 51 __func__, __LINE__, -ENODEV); 52 return -ENODEV; 53 } 54 return 0; 55} 56 57static void _vop_notify(struct vringh *vrh) 58{ 59 struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh); 60 struct vop_vdev *vdev = vvrh->vdev; 61 struct vop_device *vpdev = vdev->vpdev; 62 s8 db = vdev->dc->h2c_vdev_db; 63 64 if (db != -1) 65 vpdev->hw_ops->send_intr(vpdev, db); 66} 67 68static void vop_virtio_init_post(struct vop_vdev *vdev) 69{ 70 struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd); 71 struct vop_device *vpdev = vdev->vpdev; 72 int i, used_size; 73 74 for (i = 0; i < vdev->dd->num_vq; i++) { 75 used_size = PAGE_ALIGN(sizeof(u16) * 3 + 76 sizeof(struct vring_used_elem) * 77 le16_to_cpu(vqconfig->num)); 78 if (!le64_to_cpu(vqconfig[i].used_address)) { 79 dev_warn(vop_dev(vdev), "used_address zero??\n"); 80 continue; 81 } 82 vdev->vvr[i].vrh.vring.used = 83 (void __force *)vpdev->hw_ops->ioremap( 84 vpdev, 85 le64_to_cpu(vqconfig[i].used_address), 86 used_size); 87 } 88 89 vdev->dc->used_address_updated = 0; 90 91 dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n", 92 __func__, vdev->virtio_id); 93} 94 95static inline void vop_virtio_device_reset(struct vop_vdev *vdev) 96{ 97 int i; 98 99 dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n", 100 __func__, vdev->dd->status, vdev->virtio_id); 101 102 for (i = 0; i < vdev->dd->num_vq; i++) 103 /* 104 * Avoid lockdep false positive. The + 1 is for the vop 105 * mutex which is held in the reset devices code path. 106 */ 107 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 108 109 /* 0 status means "reset" */ 110 vdev->dd->status = 0; 111 vdev->dc->vdev_reset = 0; 112 vdev->dc->host_ack = 1; 113 114 for (i = 0; i < vdev->dd->num_vq; i++) { 115 struct vringh *vrh = &vdev->vvr[i].vrh; 116 117 vdev->vvr[i].vring.info->avail_idx = 0; 118 vrh->completed = 0; 119 vrh->last_avail_idx = 0; 120 vrh->last_used_idx = 0; 121 } 122 123 for (i = 0; i < vdev->dd->num_vq; i++) 124 mutex_unlock(&vdev->vvr[i].vr_mutex); 125} 126 127static void vop_virtio_reset_devices(struct vop_info *vi) 128{ 129 struct list_head *pos, *tmp; 130 struct vop_vdev *vdev; 131 132 list_for_each_safe(pos, tmp, &vi->vdev_list) { 133 vdev = list_entry(pos, struct vop_vdev, list); 134 vop_virtio_device_reset(vdev); 135 vdev->poll_wake = 1; 136 wake_up(&vdev->waitq); 137 } 138} 139 140static void vop_bh_handler(struct work_struct *work) 141{ 142 struct vop_vdev *vdev = container_of(work, struct vop_vdev, 143 virtio_bh_work); 144 145 if (vdev->dc->used_address_updated) 146 vop_virtio_init_post(vdev); 147 148 if (vdev->dc->vdev_reset) 149 vop_virtio_device_reset(vdev); 150 151 vdev->poll_wake = 1; 152 wake_up(&vdev->waitq); 153} 154 155static irqreturn_t _vop_virtio_intr_handler(int irq, void *data) 156{ 157 struct vop_vdev *vdev = data; 158 struct vop_device *vpdev = vdev->vpdev; 159 160 vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db); 161 schedule_work(&vdev->virtio_bh_work); 162 return IRQ_HANDLED; 163} 164 165static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp) 166{ 167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 168 int ret = 0, retry, i; 169 struct vop_device *vpdev = vdev->vpdev; 170 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 171 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 172 s8 db = bootparam->h2c_config_db; 173 174 mutex_lock(&vi->vop_mutex); 175 for (i = 0; i < vdev->dd->num_vq; i++) 176 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 177 178 if (db == -1 || vdev->dd->type == -1) { 179 ret = -EIO; 180 goto exit; 181 } 182 183 memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len); 184 vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; 185 vpdev->hw_ops->send_intr(vpdev, db); 186 187 for (retry = 100; retry--;) { 188 ret = wait_event_timeout(wake, vdev->dc->guest_ack, 189 msecs_to_jiffies(100)); 190 if (ret) 191 break; 192 } 193 194 dev_dbg(vop_dev(vdev), 195 "%s %d retry: %d\n", __func__, __LINE__, retry); 196 vdev->dc->config_change = 0; 197 vdev->dc->guest_ack = 0; 198exit: 199 for (i = 0; i < vdev->dd->num_vq; i++) 200 mutex_unlock(&vdev->vvr[i].vr_mutex); 201 mutex_unlock(&vi->vop_mutex); 202 return ret; 203} 204 205static int vop_copy_dp_entry(struct vop_vdev *vdev, 206 struct mic_device_desc *argp, __u8 *type, 207 struct mic_device_desc **devpage) 208{ 209 struct vop_device *vpdev = vdev->vpdev; 210 struct mic_device_desc *devp; 211 struct mic_vqconfig *vqconfig; 212 int ret = 0, i; 213 bool slot_found = false; 214 215 vqconfig = mic_vq_config(argp); 216 for (i = 0; i < argp->num_vq; i++) { 217 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { 218 ret = -EINVAL; 219 dev_err(vop_dev(vdev), "%s %d err %d\n", 220 __func__, __LINE__, ret); 221 goto exit; 222 } 223 } 224 225 /* Find the first free device page entry */ 226 for (i = sizeof(struct mic_bootparam); 227 i < MIC_DP_SIZE - mic_total_desc_size(argp); 228 i += mic_total_desc_size(devp)) { 229 devp = vpdev->hw_ops->get_dp(vpdev) + i; 230 if (devp->type == 0 || devp->type == -1) { 231 slot_found = true; 232 break; 233 } 234 } 235 if (!slot_found) { 236 ret = -EINVAL; 237 dev_err(vop_dev(vdev), "%s %d err %d\n", 238 __func__, __LINE__, ret); 239 goto exit; 240 } 241 /* 242 * Save off the type before doing the memcpy. Type will be set in the 243 * end after completing all initialization for the new device. 244 */ 245 *type = argp->type; 246 argp->type = 0; 247 memcpy(devp, argp, mic_desc_size(argp)); 248 249 *devpage = devp; 250exit: 251 return ret; 252} 253 254static void vop_init_device_ctrl(struct vop_vdev *vdev, 255 struct mic_device_desc *devpage) 256{ 257 struct mic_device_ctrl *dc; 258 259 dc = (void *)devpage + mic_aligned_desc_size(devpage); 260 261 dc->config_change = 0; 262 dc->guest_ack = 0; 263 dc->vdev_reset = 0; 264 dc->host_ack = 0; 265 dc->used_address_updated = 0; 266 dc->c2h_vdev_db = -1; 267 dc->h2c_vdev_db = -1; 268 vdev->dc = dc; 269} 270 271static int vop_virtio_add_device(struct vop_vdev *vdev, 272 struct mic_device_desc *argp) 273{ 274 struct vop_info *vi = vdev->vi; 275 struct vop_device *vpdev = vi->vpdev; 276 struct mic_device_desc *dd = NULL; 277 struct mic_vqconfig *vqconfig; 278 int vr_size, i, j, ret; 279 u8 type = 0; 280 s8 db = -1; 281 char irqname[16]; 282 struct mic_bootparam *bootparam; 283 u16 num; 284 dma_addr_t vr_addr; 285 286 bootparam = vpdev->hw_ops->get_dp(vpdev); 287 init_waitqueue_head(&vdev->waitq); 288 INIT_LIST_HEAD(&vdev->list); 289 vdev->vpdev = vpdev; 290 291 ret = vop_copy_dp_entry(vdev, argp, &type, &dd); 292 if (ret) { 293 dev_err(vop_dev(vdev), "%s %d err %d\n", 294 __func__, __LINE__, ret); 295 return ret; 296 } 297 298 vop_init_device_ctrl(vdev, dd); 299 300 vdev->dd = dd; 301 vdev->virtio_id = type; 302 vqconfig = mic_vq_config(dd); 303 INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler); 304 305 for (i = 0; i < dd->num_vq; i++) { 306 struct vop_vringh *vvr = &vdev->vvr[i]; 307 struct mic_vring *vr = &vdev->vvr[i].vring; 308 309 num = le16_to_cpu(vqconfig[i].num); 310 mutex_init(&vvr->vr_mutex); 311 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + 312 sizeof(struct _mic_vring_info)); 313 vr->va = (void *) 314 __get_free_pages(GFP_KERNEL | __GFP_ZERO, 315 get_order(vr_size)); 316 if (!vr->va) { 317 ret = -ENOMEM; 318 dev_err(vop_dev(vdev), "%s %d err %d\n", 319 __func__, __LINE__, ret); 320 goto err; 321 } 322 vr->len = vr_size; 323 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); 324 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i); 325 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size, 326 DMA_BIDIRECTIONAL); 327 if (dma_mapping_error(&vpdev->dev, vr_addr)) { 328 free_pages((unsigned long)vr->va, get_order(vr_size)); 329 ret = -ENOMEM; 330 dev_err(vop_dev(vdev), "%s %d err %d\n", 331 __func__, __LINE__, ret); 332 goto err; 333 } 334 vqconfig[i].address = cpu_to_le64(vr_addr); 335 336 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); 337 ret = vringh_init_kern(&vvr->vrh, 338 *(u32 *)mic_vq_features(vdev->dd), 339 num, false, vr->vr.desc, vr->vr.avail, 340 vr->vr.used); 341 if (ret) { 342 dev_err(vop_dev(vdev), "%s %d err %d\n", 343 __func__, __LINE__, ret); 344 goto err; 345 } 346 vringh_kiov_init(&vvr->riov, NULL, 0); 347 vringh_kiov_init(&vvr->wiov, NULL, 0); 348 vvr->head = USHRT_MAX; 349 vvr->vdev = vdev; 350 vvr->vrh.notify = _vop_notify; 351 dev_dbg(&vpdev->dev, 352 "%s %d index %d va %p info %p vr_size 0x%x\n", 353 __func__, __LINE__, i, vr->va, vr->info, vr_size); 354 vvr->buf = (void *)__get_free_pages(GFP_KERNEL, 355 get_order(VOP_INT_DMA_BUF_SIZE)); 356 vvr->buf_da = dma_map_single(&vpdev->dev, 357 vvr->buf, VOP_INT_DMA_BUF_SIZE, 358 DMA_BIDIRECTIONAL); 359 } 360 361 snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index, 362 vdev->virtio_id); 363 vdev->virtio_db = vpdev->hw_ops->next_db(vpdev); 364 vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, 365 _vop_virtio_intr_handler, irqname, vdev, 366 vdev->virtio_db); 367 if (IS_ERR(vdev->virtio_cookie)) { 368 ret = PTR_ERR(vdev->virtio_cookie); 369 dev_dbg(&vpdev->dev, "request irq failed\n"); 370 goto err; 371 } 372 373 vdev->dc->c2h_vdev_db = vdev->virtio_db; 374 375 /* 376 * Order the type update with previous stores. This write barrier 377 * is paired with the corresponding read barrier before the uncached 378 * system memory read of the type, on the card while scanning the 379 * device page. 380 */ 381 smp_wmb(); 382 dd->type = type; 383 argp->type = type; 384 385 if (bootparam) { 386 db = bootparam->h2c_config_db; 387 if (db != -1) 388 vpdev->hw_ops->send_intr(vpdev, db); 389 } 390 dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db); 391 return 0; 392err: 393 vqconfig = mic_vq_config(dd); 394 for (j = 0; j < i; j++) { 395 struct vop_vringh *vvr = &vdev->vvr[j]; 396 397 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address), 398 vvr->vring.len, DMA_BIDIRECTIONAL); 399 free_pages((unsigned long)vvr->vring.va, 400 get_order(vvr->vring.len)); 401 } 402 return ret; 403} 404 405static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp, 406 struct vop_device *vpdev) 407{ 408 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 409 s8 db; 410 int ret, retry; 411 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 412 413 devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; 414 db = bootparam->h2c_config_db; 415 if (db != -1) 416 vpdev->hw_ops->send_intr(vpdev, db); 417 else 418 goto done; 419 for (retry = 15; retry--;) { 420 ret = wait_event_timeout(wake, devp->guest_ack, 421 msecs_to_jiffies(1000)); 422 if (ret) 423 break; 424 } 425done: 426 devp->config_change = 0; 427 devp->guest_ack = 0; 428} 429 430static void vop_virtio_del_device(struct vop_vdev *vdev) 431{ 432 struct vop_info *vi = vdev->vi; 433 struct vop_device *vpdev = vdev->vpdev; 434 int i; 435 struct mic_vqconfig *vqconfig; 436 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 437 438 if (!bootparam) 439 goto skip_hot_remove; 440 vop_dev_remove(vi, vdev->dc, vpdev); 441skip_hot_remove: 442 vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); 443 flush_work(&vdev->virtio_bh_work); 444 vqconfig = mic_vq_config(vdev->dd); 445 for (i = 0; i < vdev->dd->num_vq; i++) { 446 struct vop_vringh *vvr = &vdev->vvr[i]; 447 448 dma_unmap_single(&vpdev->dev, 449 vvr->buf_da, VOP_INT_DMA_BUF_SIZE, 450 DMA_BIDIRECTIONAL); 451 free_pages((unsigned long)vvr->buf, 452 get_order(VOP_INT_DMA_BUF_SIZE)); 453 vringh_kiov_cleanup(&vvr->riov); 454 vringh_kiov_cleanup(&vvr->wiov); 455 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address), 456 vvr->vring.len, DMA_BIDIRECTIONAL); 457 free_pages((unsigned long)vvr->vring.va, 458 get_order(vvr->vring.len)); 459 } 460 /* 461 * Order the type update with previous stores. This write barrier 462 * is paired with the corresponding read barrier before the uncached 463 * system memory read of the type, on the card while scanning the 464 * device page. 465 */ 466 smp_wmb(); 467 vdev->dd->type = -1; 468} 469 470/* 471 * vop_sync_dma - Wrapper for synchronous DMAs. 472 * 473 * @dev - The address of the pointer to the device instance used 474 * for DMA registration. 475 * @dst - destination DMA address. 476 * @src - source DMA address. 477 * @len - size of the transfer. 478 * 479 * Return DMA_SUCCESS on success 480 */ 481static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src, 482 size_t len) 483{ 484 int err = 0; 485 struct dma_device *ddev; 486 struct dma_async_tx_descriptor *tx; 487 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 488 struct dma_chan *vop_ch = vi->dma_ch; 489 490 if (!vop_ch) { 491 err = -EBUSY; 492 goto error; 493 } 494 ddev = vop_ch->device; 495 tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len, 496 DMA_PREP_FENCE); 497 if (!tx) { 498 err = -ENOMEM; 499 goto error; 500 } else { 501 dma_cookie_t cookie; 502 503 cookie = tx->tx_submit(tx); 504 if (dma_submit_error(cookie)) { 505 err = -ENOMEM; 506 goto error; 507 } 508 dma_async_issue_pending(vop_ch); 509 err = dma_sync_wait(vop_ch, cookie); 510 } 511error: 512 if (err) 513 dev_err(&vi->vpdev->dev, "%s %d err %d\n", 514 __func__, __LINE__, err); 515 return err; 516} 517 518#define VOP_USE_DMA true 519 520/* 521 * Initiates the copies across the PCIe bus from card memory to a user 522 * space buffer. When transfers are done using DMA, source/destination 523 * addresses and transfer length must follow the alignment requirements of 524 * the MIC DMA engine. 525 */ 526static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf, 527 size_t len, u64 daddr, size_t dlen, 528 int vr_idx) 529{ 530 struct vop_device *vpdev = vdev->vpdev; 531 void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); 532 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 533 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 534 size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; 535 bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 536 size_t dma_offset, partlen; 537 int err; 538 539 if (!VOP_USE_DMA) { 540 if (copy_to_user(ubuf, (void __force *)dbuf, len)) { 541 err = -EFAULT; 542 dev_err(vop_dev(vdev), "%s %d err %d\n", 543 __func__, __LINE__, err); 544 goto err; 545 } 546 vdev->in_bytes += len; 547 err = 0; 548 goto err; 549 } 550 551 dma_offset = daddr - round_down(daddr, dma_alignment); 552 daddr -= dma_offset; 553 len += dma_offset; 554 /* 555 * X100 uses DMA addresses as seen by the card so adding 556 * the aperture base is not required for DMA. However x200 557 * requires DMA addresses to be an offset into the bar so 558 * add the aperture base for x200. 559 */ 560 if (x200) 561 daddr += vpdev->aper->pa; 562 while (len) { 563 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 564 err = vop_sync_dma(vdev, vvr->buf_da, daddr, 565 ALIGN(partlen, dma_alignment)); 566 if (err) { 567 dev_err(vop_dev(vdev), "%s %d err %d\n", 568 __func__, __LINE__, err); 569 goto err; 570 } 571 if (copy_to_user(ubuf, vvr->buf + dma_offset, 572 partlen - dma_offset)) { 573 err = -EFAULT; 574 dev_err(vop_dev(vdev), "%s %d err %d\n", 575 __func__, __LINE__, err); 576 goto err; 577 } 578 daddr += partlen; 579 ubuf += partlen; 580 dbuf += partlen; 581 vdev->in_bytes_dma += partlen; 582 vdev->in_bytes += partlen; 583 len -= partlen; 584 dma_offset = 0; 585 } 586 err = 0; 587err: 588 vpdev->hw_ops->iounmap(vpdev, dbuf); 589 dev_dbg(vop_dev(vdev), 590 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", 591 __func__, ubuf, dbuf, len, vr_idx); 592 return err; 593} 594 595/* 596 * Initiates copies across the PCIe bus from a user space buffer to card 597 * memory. When transfers are done using DMA, source/destination addresses 598 * and transfer length must follow the alignment requirements of the MIC 599 * DMA engine. 600 */ 601static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf, 602 size_t len, u64 daddr, size_t dlen, 603 int vr_idx) 604{ 605 struct vop_device *vpdev = vdev->vpdev; 606 void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); 607 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 608 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 609 size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; 610 bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 611 size_t partlen; 612 bool dma = VOP_USE_DMA; 613 int err = 0; 614 615 if (daddr & (dma_alignment - 1)) { 616 vdev->tx_dst_unaligned += len; 617 dma = false; 618 } else if (ALIGN(len, dma_alignment) > dlen) { 619 vdev->tx_len_unaligned += len; 620 dma = false; 621 } 622 623 if (!dma) 624 goto memcpy; 625 626 /* 627 * X100 uses DMA addresses as seen by the card so adding 628 * the aperture base is not required for DMA. However x200 629 * requires DMA addresses to be an offset into the bar so 630 * add the aperture base for x200. 631 */ 632 if (x200) 633 daddr += vpdev->aper->pa; 634 while (len) { 635 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 636 637 if (copy_from_user(vvr->buf, ubuf, partlen)) { 638 err = -EFAULT; 639 dev_err(vop_dev(vdev), "%s %d err %d\n", 640 __func__, __LINE__, err); 641 goto err; 642 } 643 err = vop_sync_dma(vdev, daddr, vvr->buf_da, 644 ALIGN(partlen, dma_alignment)); 645 if (err) { 646 dev_err(vop_dev(vdev), "%s %d err %d\n", 647 __func__, __LINE__, err); 648 goto err; 649 } 650 daddr += partlen; 651 ubuf += partlen; 652 dbuf += partlen; 653 vdev->out_bytes_dma += partlen; 654 vdev->out_bytes += partlen; 655 len -= partlen; 656 } 657memcpy: 658 /* 659 * We are copying to IO below and should ideally use something 660 * like copy_from_user_toio(..) if it existed. 661 */ 662 if (copy_from_user((void __force *)dbuf, ubuf, len)) { 663 err = -EFAULT; 664 dev_err(vop_dev(vdev), "%s %d err %d\n", 665 __func__, __LINE__, err); 666 goto err; 667 } 668 vdev->out_bytes += len; 669 err = 0; 670err: 671 vpdev->hw_ops->iounmap(vpdev, dbuf); 672 dev_dbg(vop_dev(vdev), 673 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", 674 __func__, ubuf, dbuf, len, vr_idx); 675 return err; 676} 677 678#define MIC_VRINGH_READ true 679 680/* Determine the total number of bytes consumed in a VRINGH KIOV */ 681static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov) 682{ 683 int i; 684 u32 total = iov->consumed; 685 686 for (i = 0; i < iov->i; i++) 687 total += iov->iov[i].iov_len; 688 return total; 689} 690 691/* 692 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. 693 * This API is heavily based on the vringh_iov_xfer(..) implementation 694 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) 695 * and vringh_iov_push_kern(..) directly is because there is no 696 * way to override the VRINGH xfer(..) routines as of v3.10. 697 */ 698static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov, 699 void __user *ubuf, size_t len, bool read, int vr_idx, 700 size_t *out_len) 701{ 702 int ret = 0; 703 size_t partlen, tot_len = 0; 704 705 while (len && iov->i < iov->used) { 706 struct kvec *kiov = &iov->iov[iov->i]; 707 708 partlen = min(kiov->iov_len, len); 709 if (read) 710 ret = vop_virtio_copy_to_user(vdev, ubuf, partlen, 711 (u64)kiov->iov_base, 712 kiov->iov_len, 713 vr_idx); 714 else 715 ret = vop_virtio_copy_from_user(vdev, ubuf, partlen, 716 (u64)kiov->iov_base, 717 kiov->iov_len, 718 vr_idx); 719 if (ret) { 720 dev_err(vop_dev(vdev), "%s %d err %d\n", 721 __func__, __LINE__, ret); 722 break; 723 } 724 len -= partlen; 725 ubuf += partlen; 726 tot_len += partlen; 727 iov->consumed += partlen; 728 kiov->iov_len -= partlen; 729 kiov->iov_base += partlen; 730 if (!kiov->iov_len) { 731 /* Fix up old iov element then increment. */ 732 kiov->iov_len = iov->consumed; 733 kiov->iov_base -= iov->consumed; 734 735 iov->consumed = 0; 736 iov->i++; 737 } 738 } 739 *out_len = tot_len; 740 return ret; 741} 742 743/* 744 * Use the standard VRINGH infrastructure in the kernel to fetch new 745 * descriptors, initiate the copies and update the used ring. 746 */ 747static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy) 748{ 749 int ret = 0; 750 u32 iovcnt = copy->iovcnt; 751 struct iovec iov; 752 struct iovec __user *u_iov = copy->iov; 753 void __user *ubuf = NULL; 754 struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx]; 755 struct vringh_kiov *riov = &vvr->riov; 756 struct vringh_kiov *wiov = &vvr->wiov; 757 struct vringh *vrh = &vvr->vrh; 758 u16 *head = &vvr->head; 759 struct mic_vring *vr = &vvr->vring; 760 size_t len = 0, out_len; 761 762 copy->out_len = 0; 763 /* Fetch a new IOVEC if all previous elements have been processed */ 764 if (riov->i == riov->used && wiov->i == wiov->used) { 765 ret = vringh_getdesc_kern(vrh, riov, wiov, 766 head, GFP_KERNEL); 767 /* Check if there are available descriptors */ 768 if (ret <= 0) 769 return ret; 770 } 771 while (iovcnt) { 772 if (!len) { 773 /* Copy over a new iovec from user space. */ 774 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); 775 if (ret) { 776 ret = -EINVAL; 777 dev_err(vop_dev(vdev), "%s %d err %d\n", 778 __func__, __LINE__, ret); 779 break; 780 } 781 len = iov.iov_len; 782 ubuf = iov.iov_base; 783 } 784 /* Issue all the read descriptors first */ 785 ret = vop_vringh_copy(vdev, riov, ubuf, len, 786 MIC_VRINGH_READ, copy->vr_idx, &out_len); 787 if (ret) { 788 dev_err(vop_dev(vdev), "%s %d err %d\n", 789 __func__, __LINE__, ret); 790 break; 791 } 792 len -= out_len; 793 ubuf += out_len; 794 copy->out_len += out_len; 795 /* Issue the write descriptors next */ 796 ret = vop_vringh_copy(vdev, wiov, ubuf, len, 797 !MIC_VRINGH_READ, copy->vr_idx, &out_len); 798 if (ret) { 799 dev_err(vop_dev(vdev), "%s %d err %d\n", 800 __func__, __LINE__, ret); 801 break; 802 } 803 len -= out_len; 804 ubuf += out_len; 805 copy->out_len += out_len; 806 if (!len) { 807 /* One user space iovec is now completed */ 808 iovcnt--; 809 u_iov++; 810 } 811 /* Exit loop if all elements in KIOVs have been processed. */ 812 if (riov->i == riov->used && wiov->i == wiov->used) 813 break; 814 } 815 /* 816 * Update the used ring if a descriptor was available and some data was 817 * copied in/out and the user asked for a used ring update. 818 */ 819 if (*head != USHRT_MAX && copy->out_len && copy->update_used) { 820 u32 total = 0; 821 822 /* Determine the total data consumed */ 823 total += vop_vringh_iov_consumed(riov); 824 total += vop_vringh_iov_consumed(wiov); 825 vringh_complete_kern(vrh, *head, total); 826 *head = USHRT_MAX; 827 if (vringh_need_notify_kern(vrh) > 0) 828 vringh_notify(vrh); 829 vringh_kiov_cleanup(riov); 830 vringh_kiov_cleanup(wiov); 831 /* Update avail idx for user space */ 832 vr->info->avail_idx = vrh->last_avail_idx; 833 } 834 return ret; 835} 836 837static inline int vop_verify_copy_args(struct vop_vdev *vdev, 838 struct mic_copy_desc *copy) 839{ 840 if (!vdev || copy->vr_idx >= vdev->dd->num_vq) 841 return -EINVAL; 842 return 0; 843} 844 845/* Copy a specified number of virtio descriptors in a chain */ 846static int vop_virtio_copy_desc(struct vop_vdev *vdev, 847 struct mic_copy_desc *copy) 848{ 849 int err; 850 struct vop_vringh *vvr; 851 852 err = vop_verify_copy_args(vdev, copy); 853 if (err) 854 return err; 855 856 vvr = &vdev->vvr[copy->vr_idx]; 857 mutex_lock(&vvr->vr_mutex); 858 if (!vop_vdevup(vdev)) { 859 err = -ENODEV; 860 dev_err(vop_dev(vdev), "%s %d err %d\n", 861 __func__, __LINE__, err); 862 goto err; 863 } 864 err = _vop_virtio_copy(vdev, copy); 865 if (err) { 866 dev_err(vop_dev(vdev), "%s %d err %d\n", 867 __func__, __LINE__, err); 868 } 869err: 870 mutex_unlock(&vvr->vr_mutex); 871 return err; 872} 873 874static int vop_open(struct inode *inode, struct file *f) 875{ 876 struct vop_vdev *vdev; 877 struct vop_info *vi = container_of(f->private_data, 878 struct vop_info, miscdev); 879 880 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 881 if (!vdev) 882 return -ENOMEM; 883 vdev->vi = vi; 884 mutex_init(&vdev->vdev_mutex); 885 f->private_data = vdev; 886 init_completion(&vdev->destroy); 887 complete(&vdev->destroy); 888 return 0; 889} 890 891static int vop_release(struct inode *inode, struct file *f) 892{ 893 struct vop_vdev *vdev = f->private_data, *vdev_tmp; 894 struct vop_info *vi = vdev->vi; 895 struct list_head *pos, *tmp; 896 bool found = false; 897 898 mutex_lock(&vdev->vdev_mutex); 899 if (vdev->deleted) 900 goto unlock; 901 mutex_lock(&vi->vop_mutex); 902 list_for_each_safe(pos, tmp, &vi->vdev_list) { 903 vdev_tmp = list_entry(pos, struct vop_vdev, list); 904 if (vdev == vdev_tmp) { 905 vop_virtio_del_device(vdev); 906 list_del(pos); 907 found = true; 908 break; 909 } 910 } 911 mutex_unlock(&vi->vop_mutex); 912unlock: 913 mutex_unlock(&vdev->vdev_mutex); 914 if (!found) 915 wait_for_completion(&vdev->destroy); 916 f->private_data = NULL; 917 kfree(vdev); 918 return 0; 919} 920 921static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 922{ 923 struct vop_vdev *vdev = f->private_data; 924 struct vop_info *vi = vdev->vi; 925 void __user *argp = (void __user *)arg; 926 int ret; 927 928 switch (cmd) { 929 case MIC_VIRTIO_ADD_DEVICE: 930 { 931 struct mic_device_desc dd, *dd_config; 932 933 if (copy_from_user(&dd, argp, sizeof(dd))) 934 return -EFAULT; 935 936 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || 937 dd.num_vq > MIC_MAX_VRINGS) 938 return -EINVAL; 939 940 dd_config = memdup_user(argp, mic_desc_size(&dd)); 941 if (IS_ERR(dd_config)) 942 return PTR_ERR(dd_config); 943 944 /* Ensure desc has not changed between the two reads */ 945 if (memcmp(&dd, dd_config, sizeof(dd))) { 946 ret = -EINVAL; 947 goto free_ret; 948 } 949 mutex_lock(&vdev->vdev_mutex); 950 mutex_lock(&vi->vop_mutex); 951 ret = vop_virtio_add_device(vdev, dd_config); 952 if (ret) 953 goto unlock_ret; 954 list_add_tail(&vdev->list, &vi->vdev_list); 955unlock_ret: 956 mutex_unlock(&vi->vop_mutex); 957 mutex_unlock(&vdev->vdev_mutex); 958free_ret: 959 kfree(dd_config); 960 return ret; 961 } 962 case MIC_VIRTIO_COPY_DESC: 963 { 964 struct mic_copy_desc copy; 965 966 mutex_lock(&vdev->vdev_mutex); 967 ret = vop_vdev_inited(vdev); 968 if (ret) 969 goto _unlock_ret; 970 971 if (copy_from_user(&copy, argp, sizeof(copy))) { 972 ret = -EFAULT; 973 goto _unlock_ret; 974 } 975 976 ret = vop_virtio_copy_desc(vdev, &copy); 977 if (ret < 0) 978 goto _unlock_ret; 979 if (copy_to_user( 980 &((struct mic_copy_desc __user *)argp)->out_len, 981 &copy.out_len, sizeof(copy.out_len))) 982 ret = -EFAULT; 983_unlock_ret: 984 mutex_unlock(&vdev->vdev_mutex); 985 return ret; 986 } 987 case MIC_VIRTIO_CONFIG_CHANGE: 988 { 989 void *buf; 990 991 mutex_lock(&vdev->vdev_mutex); 992 ret = vop_vdev_inited(vdev); 993 if (ret) 994 goto __unlock_ret; 995 buf = memdup_user(argp, vdev->dd->config_len); 996 if (IS_ERR(buf)) { 997 ret = PTR_ERR(buf); 998 goto __unlock_ret; 999 } 1000 ret = vop_virtio_config_change(vdev, buf); 1001 kfree(buf); 1002__unlock_ret: 1003 mutex_unlock(&vdev->vdev_mutex); 1004 return ret; 1005 } 1006 default: 1007 return -ENOIOCTLCMD; 1008 }; 1009 return 0; 1010} 1011 1012/* 1013 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and 1014 * not when previously enqueued buffers may be available. This means that 1015 * in the card->host (TX) path, when userspace is unblocked by poll it 1016 * must drain all available descriptors or it can stall. 1017 */ 1018static __poll_t vop_poll(struct file *f, poll_table *wait) 1019{ 1020 struct vop_vdev *vdev = f->private_data; 1021 __poll_t mask = 0; 1022 1023 mutex_lock(&vdev->vdev_mutex); 1024 if (vop_vdev_inited(vdev)) { 1025 mask = EPOLLERR; 1026 goto done; 1027 } 1028 poll_wait(f, &vdev->waitq, wait); 1029 if (vop_vdev_inited(vdev)) { 1030 mask = EPOLLERR; 1031 } else if (vdev->poll_wake) { 1032 vdev->poll_wake = 0; 1033 mask = EPOLLIN | EPOLLOUT; 1034 } 1035done: 1036 mutex_unlock(&vdev->vdev_mutex); 1037 return mask; 1038} 1039 1040static inline int 1041vop_query_offset(struct vop_vdev *vdev, unsigned long offset, 1042 unsigned long *size, unsigned long *pa) 1043{ 1044 struct vop_device *vpdev = vdev->vpdev; 1045 unsigned long start = MIC_DP_SIZE; 1046 int i; 1047 1048 /* 1049 * MMAP interface is as follows: 1050 * offset region 1051 * 0x0 virtio device_page 1052 * 0x1000 first vring 1053 * 0x1000 + size of 1st vring second vring 1054 * .... 1055 */ 1056 if (!offset) { 1057 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev)); 1058 *size = MIC_DP_SIZE; 1059 return 0; 1060 } 1061 1062 for (i = 0; i < vdev->dd->num_vq; i++) { 1063 struct vop_vringh *vvr = &vdev->vvr[i]; 1064 1065 if (offset == start) { 1066 *pa = virt_to_phys(vvr->vring.va); 1067 *size = vvr->vring.len; 1068 return 0; 1069 } 1070 start += vvr->vring.len; 1071 } 1072 return -1; 1073} 1074 1075/* 1076 * Maps the device page and virtio rings to user space for readonly access. 1077 */ 1078static int vop_mmap(struct file *f, struct vm_area_struct *vma) 1079{ 1080 struct vop_vdev *vdev = f->private_data; 1081 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 1082 unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; 1083 int i, err; 1084 1085 err = vop_vdev_inited(vdev); 1086 if (err) 1087 goto ret; 1088 if (vma->vm_flags & VM_WRITE) { 1089 err = -EACCES; 1090 goto ret; 1091 } 1092 while (size_rem) { 1093 i = vop_query_offset(vdev, offset, &size, &pa); 1094 if (i < 0) { 1095 err = -EINVAL; 1096 goto ret; 1097 } 1098 err = remap_pfn_range(vma, vma->vm_start + offset, 1099 pa >> PAGE_SHIFT, size, 1100 vma->vm_page_prot); 1101 if (err) 1102 goto ret; 1103 size_rem -= size; 1104 offset += size; 1105 } 1106ret: 1107 return err; 1108} 1109 1110static const struct file_operations vop_fops = { 1111 .open = vop_open, 1112 .release = vop_release, 1113 .unlocked_ioctl = vop_ioctl, 1114 .poll = vop_poll, 1115 .mmap = vop_mmap, 1116 .owner = THIS_MODULE, 1117}; 1118 1119int vop_host_init(struct vop_info *vi) 1120{ 1121 int rc; 1122 struct miscdevice *mdev; 1123 struct vop_device *vpdev = vi->vpdev; 1124 1125 INIT_LIST_HEAD(&vi->vdev_list); 1126 vi->dma_ch = vpdev->dma_ch; 1127 mdev = &vi->miscdev; 1128 mdev->minor = MISC_DYNAMIC_MINOR; 1129 snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index); 1130 mdev->name = vi->name; 1131 mdev->fops = &vop_fops; 1132 mdev->parent = &vpdev->dev; 1133 1134 rc = misc_register(mdev); 1135 if (rc) 1136 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc); 1137 return rc; 1138} 1139 1140void vop_host_uninit(struct vop_info *vi) 1141{ 1142 struct list_head *pos, *tmp; 1143 struct vop_vdev *vdev; 1144 1145 mutex_lock(&vi->vop_mutex); 1146 vop_virtio_reset_devices(vi); 1147 list_for_each_safe(pos, tmp, &vi->vdev_list) { 1148 vdev = list_entry(pos, struct vop_vdev, list); 1149 list_del(pos); 1150 reinit_completion(&vdev->destroy); 1151 mutex_unlock(&vi->vop_mutex); 1152 mutex_lock(&vdev->vdev_mutex); 1153 vop_virtio_del_device(vdev); 1154 vdev->deleted = true; 1155 mutex_unlock(&vdev->vdev_mutex); 1156 complete(&vdev->destroy); 1157 mutex_lock(&vi->vop_mutex); 1158 } 1159 mutex_unlock(&vi->vop_mutex); 1160 misc_deregister(&vi->miscdev); 1161}