Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.8 1158 lines 30 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Intel MIC Platform Software Stack (MPSS) 4 * 5 * Copyright(c) 2016 Intel Corporation. 6 * 7 * Intel Virtio Over PCIe (VOP) driver. 8 */ 9#include <linux/sched.h> 10#include <linux/poll.h> 11#include <linux/dma-mapping.h> 12 13#include <linux/mic_common.h> 14#include "../common/mic_dev.h" 15 16#include <linux/mic_ioctl.h> 17#include "vop_main.h" 18 19/* Helper API to obtain the VOP PCIe device */ 20static inline struct device *vop_dev(struct vop_vdev *vdev) 21{ 22 return vdev->vpdev->dev.parent; 23} 24 25/* Helper API to check if a virtio device is initialized */ 26static inline int vop_vdev_inited(struct vop_vdev *vdev) 27{ 28 if (!vdev) 29 return -EINVAL; 30 /* Device has not been created yet */ 31 if (!vdev->dd || !vdev->dd->type) { 32 dev_err(vop_dev(vdev), "%s %d err %d\n", 33 __func__, __LINE__, -EINVAL); 34 return -EINVAL; 35 } 36 /* Device has been removed/deleted */ 37 if (vdev->dd->type == -1) { 38 dev_dbg(vop_dev(vdev), "%s %d err %d\n", 39 __func__, __LINE__, -ENODEV); 40 return -ENODEV; 41 } 42 return 0; 43} 44 45static void _vop_notify(struct vringh *vrh) 46{ 47 struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh); 48 struct vop_vdev *vdev = vvrh->vdev; 49 struct vop_device *vpdev = vdev->vpdev; 50 s8 db = vdev->dc->h2c_vdev_db; 51 52 if (db != -1) 53 vpdev->hw_ops->send_intr(vpdev, db); 54} 55 56static void vop_virtio_init_post(struct vop_vdev *vdev) 57{ 58 struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd); 59 struct vop_device *vpdev = vdev->vpdev; 60 int i, used_size; 61 62 for (i = 0; i < vdev->dd->num_vq; i++) { 63 used_size = PAGE_ALIGN(sizeof(u16) * 3 + 64 sizeof(struct vring_used_elem) * 65 le16_to_cpu(vqconfig->num)); 66 if (!le64_to_cpu(vqconfig[i].used_address)) { 67 dev_warn(vop_dev(vdev), "used_address zero??\n"); 68 continue; 69 } 70 vdev->vvr[i].vrh.vring.used = 71 (void __force *)vpdev->hw_ops->remap( 72 vpdev, 73 le64_to_cpu(vqconfig[i].used_address), 74 used_size); 75 } 76 77 vdev->dc->used_address_updated = 0; 78 79 dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n", 80 __func__, vdev->virtio_id); 81} 82 83static inline void vop_virtio_device_reset(struct vop_vdev *vdev) 84{ 85 int i; 86 87 dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n", 88 __func__, vdev->dd->status, vdev->virtio_id); 89 90 for (i = 0; i < vdev->dd->num_vq; i++) 91 /* 92 * Avoid lockdep false positive. The + 1 is for the vop 93 * mutex which is held in the reset devices code path. 94 */ 95 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 96 97 /* 0 status means "reset" */ 98 vdev->dd->status = 0; 99 vdev->dc->vdev_reset = 0; 100 vdev->dc->host_ack = 1; 101 102 for (i = 0; i < vdev->dd->num_vq; i++) { 103 struct vringh *vrh = &vdev->vvr[i].vrh; 104 105 vdev->vvr[i].vring.info->avail_idx = 0; 106 vrh->completed = 0; 107 vrh->last_avail_idx = 0; 108 vrh->last_used_idx = 0; 109 } 110 111 for (i = 0; i < vdev->dd->num_vq; i++) 112 mutex_unlock(&vdev->vvr[i].vr_mutex); 113} 114 115static void vop_virtio_reset_devices(struct vop_info *vi) 116{ 117 struct list_head *pos, *tmp; 118 struct vop_vdev *vdev; 119 120 list_for_each_safe(pos, tmp, &vi->vdev_list) { 121 vdev = list_entry(pos, struct vop_vdev, list); 122 vop_virtio_device_reset(vdev); 123 vdev->poll_wake = 1; 124 wake_up(&vdev->waitq); 125 } 126} 127 128static void vop_bh_handler(struct work_struct *work) 129{ 130 struct vop_vdev *vdev = container_of(work, struct vop_vdev, 131 virtio_bh_work); 132 133 if (vdev->dc->used_address_updated) 134 vop_virtio_init_post(vdev); 135 136 if (vdev->dc->vdev_reset) 137 vop_virtio_device_reset(vdev); 138 139 vdev->poll_wake = 1; 140 wake_up(&vdev->waitq); 141} 142 143static irqreturn_t _vop_virtio_intr_handler(int irq, void *data) 144{ 145 struct vop_vdev *vdev = data; 146 struct vop_device *vpdev = vdev->vpdev; 147 148 vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db); 149 schedule_work(&vdev->virtio_bh_work); 150 return IRQ_HANDLED; 151} 152 153static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp) 154{ 155 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 156 int ret = 0, retry, i; 157 struct vop_device *vpdev = vdev->vpdev; 158 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 159 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 160 s8 db = bootparam->h2c_config_db; 161 162 mutex_lock(&vi->vop_mutex); 163 for (i = 0; i < vdev->dd->num_vq; i++) 164 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 165 166 if (db == -1 || vdev->dd->type == -1) { 167 ret = -EIO; 168 goto exit; 169 } 170 171 memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len); 172 vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; 173 vpdev->hw_ops->send_intr(vpdev, db); 174 175 for (retry = 100; retry--;) { 176 ret = wait_event_timeout(wake, vdev->dc->guest_ack, 177 msecs_to_jiffies(100)); 178 if (ret) 179 break; 180 } 181 182 dev_dbg(vop_dev(vdev), 183 "%s %d retry: %d\n", __func__, __LINE__, retry); 184 vdev->dc->config_change = 0; 185 vdev->dc->guest_ack = 0; 186exit: 187 for (i = 0; i < vdev->dd->num_vq; i++) 188 mutex_unlock(&vdev->vvr[i].vr_mutex); 189 mutex_unlock(&vi->vop_mutex); 190 return ret; 191} 192 193static int vop_copy_dp_entry(struct vop_vdev *vdev, 194 struct mic_device_desc *argp, __u8 *type, 195 struct mic_device_desc **devpage) 196{ 197 struct vop_device *vpdev = vdev->vpdev; 198 struct mic_device_desc *devp; 199 struct mic_vqconfig *vqconfig; 200 int ret = 0, i; 201 bool slot_found = false; 202 203 vqconfig = mic_vq_config(argp); 204 for (i = 0; i < argp->num_vq; i++) { 205 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { 206 ret = -EINVAL; 207 dev_err(vop_dev(vdev), "%s %d err %d\n", 208 __func__, __LINE__, ret); 209 goto exit; 210 } 211 } 212 213 /* Find the first free device page entry */ 214 for (i = sizeof(struct mic_bootparam); 215 i < MIC_DP_SIZE - mic_total_desc_size(argp); 216 i += mic_total_desc_size(devp)) { 217 devp = vpdev->hw_ops->get_dp(vpdev) + i; 218 if (devp->type == 0 || devp->type == -1) { 219 slot_found = true; 220 break; 221 } 222 } 223 if (!slot_found) { 224 ret = -EINVAL; 225 dev_err(vop_dev(vdev), "%s %d err %d\n", 226 __func__, __LINE__, ret); 227 goto exit; 228 } 229 /* 230 * Save off the type before doing the memcpy. Type will be set in the 231 * end after completing all initialization for the new device. 232 */ 233 *type = argp->type; 234 argp->type = 0; 235 memcpy(devp, argp, mic_desc_size(argp)); 236 237 *devpage = devp; 238exit: 239 return ret; 240} 241 242static void vop_init_device_ctrl(struct vop_vdev *vdev, 243 struct mic_device_desc *devpage) 244{ 245 struct mic_device_ctrl *dc; 246 247 dc = (void *)devpage + mic_aligned_desc_size(devpage); 248 249 dc->config_change = 0; 250 dc->guest_ack = 0; 251 dc->vdev_reset = 0; 252 dc->host_ack = 0; 253 dc->used_address_updated = 0; 254 dc->c2h_vdev_db = -1; 255 dc->h2c_vdev_db = -1; 256 vdev->dc = dc; 257} 258 259static int vop_virtio_add_device(struct vop_vdev *vdev, 260 struct mic_device_desc *argp) 261{ 262 struct vop_info *vi = vdev->vi; 263 struct vop_device *vpdev = vi->vpdev; 264 struct mic_device_desc *dd = NULL; 265 struct mic_vqconfig *vqconfig; 266 int vr_size, i, j, ret; 267 u8 type = 0; 268 s8 db = -1; 269 char irqname[16]; 270 struct mic_bootparam *bootparam; 271 u16 num; 272 dma_addr_t vr_addr; 273 274 bootparam = vpdev->hw_ops->get_dp(vpdev); 275 init_waitqueue_head(&vdev->waitq); 276 INIT_LIST_HEAD(&vdev->list); 277 vdev->vpdev = vpdev; 278 279 ret = vop_copy_dp_entry(vdev, argp, &type, &dd); 280 if (ret) { 281 dev_err(vop_dev(vdev), "%s %d err %d\n", 282 __func__, __LINE__, ret); 283 return ret; 284 } 285 286 vop_init_device_ctrl(vdev, dd); 287 288 vdev->dd = dd; 289 vdev->virtio_id = type; 290 vqconfig = mic_vq_config(dd); 291 INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler); 292 293 for (i = 0; i < dd->num_vq; i++) { 294 struct vop_vringh *vvr = &vdev->vvr[i]; 295 struct mic_vring *vr = &vdev->vvr[i].vring; 296 297 num = le16_to_cpu(vqconfig[i].num); 298 mutex_init(&vvr->vr_mutex); 299 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + 300 sizeof(struct _mic_vring_info)); 301 vr->va = (void *) 302 __get_free_pages(GFP_KERNEL | __GFP_ZERO, 303 get_order(vr_size)); 304 if (!vr->va) { 305 ret = -ENOMEM; 306 dev_err(vop_dev(vdev), "%s %d err %d\n", 307 __func__, __LINE__, ret); 308 goto err; 309 } 310 vr->len = vr_size; 311 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); 312 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i); 313 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size, 314 DMA_BIDIRECTIONAL); 315 if (dma_mapping_error(&vpdev->dev, vr_addr)) { 316 free_pages((unsigned long)vr->va, get_order(vr_size)); 317 ret = -ENOMEM; 318 dev_err(vop_dev(vdev), "%s %d err %d\n", 319 __func__, __LINE__, ret); 320 goto err; 321 } 322 vqconfig[i].address = cpu_to_le64(vr_addr); 323 324 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); 325 ret = vringh_init_kern(&vvr->vrh, 326 *(u32 *)mic_vq_features(vdev->dd), 327 num, false, vr->vr.desc, vr->vr.avail, 328 vr->vr.used); 329 if (ret) { 330 dev_err(vop_dev(vdev), "%s %d err %d\n", 331 __func__, __LINE__, ret); 332 goto err; 333 } 334 vringh_kiov_init(&vvr->riov, NULL, 0); 335 vringh_kiov_init(&vvr->wiov, NULL, 0); 336 vvr->head = USHRT_MAX; 337 vvr->vdev = vdev; 338 vvr->vrh.notify = _vop_notify; 339 dev_dbg(&vpdev->dev, 340 "%s %d index %d va %p info %p vr_size 0x%x\n", 341 __func__, __LINE__, i, vr->va, vr->info, vr_size); 342 vvr->buf = (void *)__get_free_pages(GFP_KERNEL, 343 get_order(VOP_INT_DMA_BUF_SIZE)); 344 vvr->buf_da = dma_map_single(&vpdev->dev, 345 vvr->buf, VOP_INT_DMA_BUF_SIZE, 346 DMA_BIDIRECTIONAL); 347 } 348 349 snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index, 350 vdev->virtio_id); 351 vdev->virtio_db = vpdev->hw_ops->next_db(vpdev); 352 vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, 353 _vop_virtio_intr_handler, irqname, vdev, 354 vdev->virtio_db); 355 if (IS_ERR(vdev->virtio_cookie)) { 356 ret = PTR_ERR(vdev->virtio_cookie); 357 dev_dbg(&vpdev->dev, "request irq failed\n"); 358 goto err; 359 } 360 361 vdev->dc->c2h_vdev_db = vdev->virtio_db; 362 363 /* 364 * Order the type update with previous stores. This write barrier 365 * is paired with the corresponding read barrier before the uncached 366 * system memory read of the type, on the card while scanning the 367 * device page. 368 */ 369 smp_wmb(); 370 dd->type = type; 371 argp->type = type; 372 373 if (bootparam) { 374 db = bootparam->h2c_config_db; 375 if (db != -1) 376 vpdev->hw_ops->send_intr(vpdev, db); 377 } 378 dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db); 379 return 0; 380err: 381 vqconfig = mic_vq_config(dd); 382 for (j = 0; j < i; j++) { 383 struct vop_vringh *vvr = &vdev->vvr[j]; 384 385 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address), 386 vvr->vring.len, DMA_BIDIRECTIONAL); 387 free_pages((unsigned long)vvr->vring.va, 388 get_order(vvr->vring.len)); 389 } 390 return ret; 391} 392 393static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp, 394 struct vop_device *vpdev) 395{ 396 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 397 s8 db; 398 int ret, retry; 399 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 400 401 devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; 402 db = bootparam->h2c_config_db; 403 if (db != -1) 404 vpdev->hw_ops->send_intr(vpdev, db); 405 else 406 goto done; 407 for (retry = 15; retry--;) { 408 ret = wait_event_timeout(wake, devp->guest_ack, 409 msecs_to_jiffies(1000)); 410 if (ret) 411 break; 412 } 413done: 414 devp->config_change = 0; 415 devp->guest_ack = 0; 416} 417 418static void vop_virtio_del_device(struct vop_vdev *vdev) 419{ 420 struct vop_info *vi = vdev->vi; 421 struct vop_device *vpdev = vdev->vpdev; 422 int i; 423 struct mic_vqconfig *vqconfig; 424 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 425 426 if (!bootparam) 427 goto skip_hot_remove; 428 vop_dev_remove(vi, vdev->dc, vpdev); 429skip_hot_remove: 430 vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); 431 flush_work(&vdev->virtio_bh_work); 432 vqconfig = mic_vq_config(vdev->dd); 433 for (i = 0; i < vdev->dd->num_vq; i++) { 434 struct vop_vringh *vvr = &vdev->vvr[i]; 435 436 dma_unmap_single(&vpdev->dev, 437 vvr->buf_da, VOP_INT_DMA_BUF_SIZE, 438 DMA_BIDIRECTIONAL); 439 free_pages((unsigned long)vvr->buf, 440 get_order(VOP_INT_DMA_BUF_SIZE)); 441 vringh_kiov_cleanup(&vvr->riov); 442 vringh_kiov_cleanup(&vvr->wiov); 443 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address), 444 vvr->vring.len, DMA_BIDIRECTIONAL); 445 free_pages((unsigned long)vvr->vring.va, 446 get_order(vvr->vring.len)); 447 } 448 /* 449 * Order the type update with previous stores. This write barrier 450 * is paired with the corresponding read barrier before the uncached 451 * system memory read of the type, on the card while scanning the 452 * device page. 453 */ 454 smp_wmb(); 455 vdev->dd->type = -1; 456} 457 458/* 459 * vop_sync_dma - Wrapper for synchronous DMAs. 460 * 461 * @dev - The address of the pointer to the device instance used 462 * for DMA registration. 463 * @dst - destination DMA address. 464 * @src - source DMA address. 465 * @len - size of the transfer. 466 * 467 * Return DMA_SUCCESS on success 468 */ 469static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src, 470 size_t len) 471{ 472 int err = 0; 473 struct dma_device *ddev; 474 struct dma_async_tx_descriptor *tx; 475 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 476 struct dma_chan *vop_ch = vi->dma_ch; 477 478 if (!vop_ch) { 479 err = -EBUSY; 480 goto error; 481 } 482 ddev = vop_ch->device; 483 tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len, 484 DMA_PREP_FENCE); 485 if (!tx) { 486 err = -ENOMEM; 487 goto error; 488 } else { 489 dma_cookie_t cookie; 490 491 cookie = tx->tx_submit(tx); 492 if (dma_submit_error(cookie)) { 493 err = -ENOMEM; 494 goto error; 495 } 496 dma_async_issue_pending(vop_ch); 497 err = dma_sync_wait(vop_ch, cookie); 498 } 499error: 500 if (err) 501 dev_err(&vi->vpdev->dev, "%s %d err %d\n", 502 __func__, __LINE__, err); 503 return err; 504} 505 506#define VOP_USE_DMA true 507 508/* 509 * Initiates the copies across the PCIe bus from card memory to a user 510 * space buffer. When transfers are done using DMA, source/destination 511 * addresses and transfer length must follow the alignment requirements of 512 * the MIC DMA engine. 513 */ 514static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf, 515 size_t len, u64 daddr, size_t dlen, 516 int vr_idx) 517{ 518 struct vop_device *vpdev = vdev->vpdev; 519 void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len); 520 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 521 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 522 size_t dma_alignment; 523 bool x200; 524 size_t dma_offset, partlen; 525 int err; 526 527 if (!VOP_USE_DMA || !vi->dma_ch) { 528 if (copy_to_user(ubuf, (void __force *)dbuf, len)) { 529 err = -EFAULT; 530 dev_err(vop_dev(vdev), "%s %d err %d\n", 531 __func__, __LINE__, err); 532 goto err; 533 } 534 vdev->in_bytes += len; 535 err = 0; 536 goto err; 537 } 538 539 dma_alignment = 1 << vi->dma_ch->device->copy_align; 540 x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 541 542 dma_offset = daddr - round_down(daddr, dma_alignment); 543 daddr -= dma_offset; 544 len += dma_offset; 545 /* 546 * X100 uses DMA addresses as seen by the card so adding 547 * the aperture base is not required for DMA. However x200 548 * requires DMA addresses to be an offset into the bar so 549 * add the aperture base for x200. 550 */ 551 if (x200) 552 daddr += vpdev->aper->pa; 553 while (len) { 554 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 555 err = vop_sync_dma(vdev, vvr->buf_da, daddr, 556 ALIGN(partlen, dma_alignment)); 557 if (err) { 558 dev_err(vop_dev(vdev), "%s %d err %d\n", 559 __func__, __LINE__, err); 560 goto err; 561 } 562 if (copy_to_user(ubuf, vvr->buf + dma_offset, 563 partlen - dma_offset)) { 564 err = -EFAULT; 565 dev_err(vop_dev(vdev), "%s %d err %d\n", 566 __func__, __LINE__, err); 567 goto err; 568 } 569 daddr += partlen; 570 ubuf += partlen; 571 dbuf += partlen; 572 vdev->in_bytes_dma += partlen; 573 vdev->in_bytes += partlen; 574 len -= partlen; 575 dma_offset = 0; 576 } 577 err = 0; 578err: 579 vpdev->hw_ops->unmap(vpdev, dbuf); 580 dev_dbg(vop_dev(vdev), 581 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n", 582 __func__, ubuf, dbuf, len, vr_idx); 583 return err; 584} 585 586/* 587 * Initiates copies across the PCIe bus from a user space buffer to card 588 * memory. When transfers are done using DMA, source/destination addresses 589 * and transfer length must follow the alignment requirements of the MIC 590 * DMA engine. 591 */ 592static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf, 593 size_t len, u64 daddr, size_t dlen, 594 int vr_idx) 595{ 596 struct vop_device *vpdev = vdev->vpdev; 597 void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len); 598 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 599 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 600 size_t dma_alignment; 601 bool x200; 602 size_t partlen; 603 bool dma = VOP_USE_DMA && vi->dma_ch; 604 int err = 0; 605 606 if (dma) { 607 dma_alignment = 1 << vi->dma_ch->device->copy_align; 608 x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 609 610 if (daddr & (dma_alignment - 1)) { 611 vdev->tx_dst_unaligned += len; 612 dma = false; 613 } else if (ALIGN(len, dma_alignment) > dlen) { 614 vdev->tx_len_unaligned += len; 615 dma = false; 616 } 617 } 618 619 if (!dma) 620 goto memcpy; 621 622 /* 623 * X100 uses DMA addresses as seen by the card so adding 624 * the aperture base is not required for DMA. However x200 625 * requires DMA addresses to be an offset into the bar so 626 * add the aperture base for x200. 627 */ 628 if (x200) 629 daddr += vpdev->aper->pa; 630 while (len) { 631 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 632 633 if (copy_from_user(vvr->buf, ubuf, partlen)) { 634 err = -EFAULT; 635 dev_err(vop_dev(vdev), "%s %d err %d\n", 636 __func__, __LINE__, err); 637 goto err; 638 } 639 err = vop_sync_dma(vdev, daddr, vvr->buf_da, 640 ALIGN(partlen, dma_alignment)); 641 if (err) { 642 dev_err(vop_dev(vdev), "%s %d err %d\n", 643 __func__, __LINE__, err); 644 goto err; 645 } 646 daddr += partlen; 647 ubuf += partlen; 648 dbuf += partlen; 649 vdev->out_bytes_dma += partlen; 650 vdev->out_bytes += partlen; 651 len -= partlen; 652 } 653memcpy: 654 /* 655 * We are copying to IO below and should ideally use something 656 * like copy_from_user_toio(..) if it existed. 657 */ 658 if (copy_from_user((void __force *)dbuf, ubuf, len)) { 659 err = -EFAULT; 660 dev_err(vop_dev(vdev), "%s %d err %d\n", 661 __func__, __LINE__, err); 662 goto err; 663 } 664 vdev->out_bytes += len; 665 err = 0; 666err: 667 vpdev->hw_ops->unmap(vpdev, dbuf); 668 dev_dbg(vop_dev(vdev), 669 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n", 670 __func__, ubuf, dbuf, len, vr_idx); 671 return err; 672} 673 674#define MIC_VRINGH_READ true 675 676/* Determine the total number of bytes consumed in a VRINGH KIOV */ 677static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov) 678{ 679 int i; 680 u32 total = iov->consumed; 681 682 for (i = 0; i < iov->i; i++) 683 total += iov->iov[i].iov_len; 684 return total; 685} 686 687/* 688 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. 689 * This API is heavily based on the vringh_iov_xfer(..) implementation 690 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) 691 * and vringh_iov_push_kern(..) directly is because there is no 692 * way to override the VRINGH xfer(..) routines as of v3.10. 693 */ 694static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov, 695 void __user *ubuf, size_t len, bool read, int vr_idx, 696 size_t *out_len) 697{ 698 int ret = 0; 699 size_t partlen, tot_len = 0; 700 701 while (len && iov->i < iov->used) { 702 struct kvec *kiov = &iov->iov[iov->i]; 703 unsigned long daddr = (unsigned long)kiov->iov_base; 704 705 partlen = min(kiov->iov_len, len); 706 if (read) 707 ret = vop_virtio_copy_to_user(vdev, ubuf, partlen, 708 daddr, 709 kiov->iov_len, 710 vr_idx); 711 else 712 ret = vop_virtio_copy_from_user(vdev, ubuf, partlen, 713 daddr, 714 kiov->iov_len, 715 vr_idx); 716 if (ret) { 717 dev_err(vop_dev(vdev), "%s %d err %d\n", 718 __func__, __LINE__, ret); 719 break; 720 } 721 len -= partlen; 722 ubuf += partlen; 723 tot_len += partlen; 724 iov->consumed += partlen; 725 kiov->iov_len -= partlen; 726 kiov->iov_base += partlen; 727 if (!kiov->iov_len) { 728 /* Fix up old iov element then increment. */ 729 kiov->iov_len = iov->consumed; 730 kiov->iov_base -= iov->consumed; 731 732 iov->consumed = 0; 733 iov->i++; 734 } 735 } 736 *out_len = tot_len; 737 return ret; 738} 739 740/* 741 * Use the standard VRINGH infrastructure in the kernel to fetch new 742 * descriptors, initiate the copies and update the used ring. 743 */ 744static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy) 745{ 746 int ret = 0; 747 u32 iovcnt = copy->iovcnt; 748 struct iovec iov; 749 struct iovec __user *u_iov = copy->iov; 750 void __user *ubuf = NULL; 751 struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx]; 752 struct vringh_kiov *riov = &vvr->riov; 753 struct vringh_kiov *wiov = &vvr->wiov; 754 struct vringh *vrh = &vvr->vrh; 755 u16 *head = &vvr->head; 756 struct mic_vring *vr = &vvr->vring; 757 size_t len = 0, out_len; 758 759 copy->out_len = 0; 760 /* Fetch a new IOVEC if all previous elements have been processed */ 761 if (riov->i == riov->used && wiov->i == wiov->used) { 762 ret = vringh_getdesc_kern(vrh, riov, wiov, 763 head, GFP_KERNEL); 764 /* Check if there are available descriptors */ 765 if (ret <= 0) 766 return ret; 767 } 768 while (iovcnt) { 769 if (!len) { 770 /* Copy over a new iovec from user space. */ 771 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); 772 if (ret) { 773 ret = -EINVAL; 774 dev_err(vop_dev(vdev), "%s %d err %d\n", 775 __func__, __LINE__, ret); 776 break; 777 } 778 len = iov.iov_len; 779 ubuf = iov.iov_base; 780 } 781 /* Issue all the read descriptors first */ 782 ret = vop_vringh_copy(vdev, riov, ubuf, len, 783 MIC_VRINGH_READ, copy->vr_idx, &out_len); 784 if (ret) { 785 dev_err(vop_dev(vdev), "%s %d err %d\n", 786 __func__, __LINE__, ret); 787 break; 788 } 789 len -= out_len; 790 ubuf += out_len; 791 copy->out_len += out_len; 792 /* Issue the write descriptors next */ 793 ret = vop_vringh_copy(vdev, wiov, ubuf, len, 794 !MIC_VRINGH_READ, copy->vr_idx, &out_len); 795 if (ret) { 796 dev_err(vop_dev(vdev), "%s %d err %d\n", 797 __func__, __LINE__, ret); 798 break; 799 } 800 len -= out_len; 801 ubuf += out_len; 802 copy->out_len += out_len; 803 if (!len) { 804 /* One user space iovec is now completed */ 805 iovcnt--; 806 u_iov++; 807 } 808 /* Exit loop if all elements in KIOVs have been processed. */ 809 if (riov->i == riov->used && wiov->i == wiov->used) 810 break; 811 } 812 /* 813 * Update the used ring if a descriptor was available and some data was 814 * copied in/out and the user asked for a used ring update. 815 */ 816 if (*head != USHRT_MAX && copy->out_len && copy->update_used) { 817 u32 total = 0; 818 819 /* Determine the total data consumed */ 820 total += vop_vringh_iov_consumed(riov); 821 total += vop_vringh_iov_consumed(wiov); 822 vringh_complete_kern(vrh, *head, total); 823 *head = USHRT_MAX; 824 if (vringh_need_notify_kern(vrh) > 0) 825 vringh_notify(vrh); 826 vringh_kiov_cleanup(riov); 827 vringh_kiov_cleanup(wiov); 828 /* Update avail idx for user space */ 829 vr->info->avail_idx = vrh->last_avail_idx; 830 } 831 return ret; 832} 833 834static inline int vop_verify_copy_args(struct vop_vdev *vdev, 835 struct mic_copy_desc *copy) 836{ 837 if (!vdev || copy->vr_idx >= vdev->dd->num_vq) 838 return -EINVAL; 839 return 0; 840} 841 842/* Copy a specified number of virtio descriptors in a chain */ 843static int vop_virtio_copy_desc(struct vop_vdev *vdev, 844 struct mic_copy_desc *copy) 845{ 846 int err; 847 struct vop_vringh *vvr; 848 849 err = vop_verify_copy_args(vdev, copy); 850 if (err) 851 return err; 852 853 vvr = &vdev->vvr[copy->vr_idx]; 854 mutex_lock(&vvr->vr_mutex); 855 if (!vop_vdevup(vdev)) { 856 err = -ENODEV; 857 dev_err(vop_dev(vdev), "%s %d err %d\n", 858 __func__, __LINE__, err); 859 goto err; 860 } 861 err = _vop_virtio_copy(vdev, copy); 862 if (err) { 863 dev_err(vop_dev(vdev), "%s %d err %d\n", 864 __func__, __LINE__, err); 865 } 866err: 867 mutex_unlock(&vvr->vr_mutex); 868 return err; 869} 870 871static int vop_open(struct inode *inode, struct file *f) 872{ 873 struct vop_vdev *vdev; 874 struct vop_info *vi = container_of(f->private_data, 875 struct vop_info, miscdev); 876 877 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 878 if (!vdev) 879 return -ENOMEM; 880 vdev->vi = vi; 881 mutex_init(&vdev->vdev_mutex); 882 f->private_data = vdev; 883 init_completion(&vdev->destroy); 884 complete(&vdev->destroy); 885 return 0; 886} 887 888static int vop_release(struct inode *inode, struct file *f) 889{ 890 struct vop_vdev *vdev = f->private_data, *vdev_tmp; 891 struct vop_info *vi = vdev->vi; 892 struct list_head *pos, *tmp; 893 bool found = false; 894 895 mutex_lock(&vdev->vdev_mutex); 896 if (vdev->deleted) 897 goto unlock; 898 mutex_lock(&vi->vop_mutex); 899 list_for_each_safe(pos, tmp, &vi->vdev_list) { 900 vdev_tmp = list_entry(pos, struct vop_vdev, list); 901 if (vdev == vdev_tmp) { 902 vop_virtio_del_device(vdev); 903 list_del(pos); 904 found = true; 905 break; 906 } 907 } 908 mutex_unlock(&vi->vop_mutex); 909unlock: 910 mutex_unlock(&vdev->vdev_mutex); 911 if (!found) 912 wait_for_completion(&vdev->destroy); 913 f->private_data = NULL; 914 kfree(vdev); 915 return 0; 916} 917 918static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 919{ 920 struct vop_vdev *vdev = f->private_data; 921 struct vop_info *vi = vdev->vi; 922 void __user *argp = (void __user *)arg; 923 int ret; 924 925 switch (cmd) { 926 case MIC_VIRTIO_ADD_DEVICE: 927 { 928 struct mic_device_desc dd, *dd_config; 929 930 if (copy_from_user(&dd, argp, sizeof(dd))) 931 return -EFAULT; 932 933 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || 934 dd.num_vq > MIC_MAX_VRINGS) 935 return -EINVAL; 936 937 dd_config = memdup_user(argp, mic_desc_size(&dd)); 938 if (IS_ERR(dd_config)) 939 return PTR_ERR(dd_config); 940 941 /* Ensure desc has not changed between the two reads */ 942 if (memcmp(&dd, dd_config, sizeof(dd))) { 943 ret = -EINVAL; 944 goto free_ret; 945 } 946 mutex_lock(&vdev->vdev_mutex); 947 mutex_lock(&vi->vop_mutex); 948 ret = vop_virtio_add_device(vdev, dd_config); 949 if (ret) 950 goto unlock_ret; 951 list_add_tail(&vdev->list, &vi->vdev_list); 952unlock_ret: 953 mutex_unlock(&vi->vop_mutex); 954 mutex_unlock(&vdev->vdev_mutex); 955free_ret: 956 kfree(dd_config); 957 return ret; 958 } 959 case MIC_VIRTIO_COPY_DESC: 960 { 961 struct mic_copy_desc copy; 962 963 mutex_lock(&vdev->vdev_mutex); 964 ret = vop_vdev_inited(vdev); 965 if (ret) 966 goto _unlock_ret; 967 968 if (copy_from_user(&copy, argp, sizeof(copy))) { 969 ret = -EFAULT; 970 goto _unlock_ret; 971 } 972 973 ret = vop_virtio_copy_desc(vdev, &copy); 974 if (ret < 0) 975 goto _unlock_ret; 976 if (copy_to_user( 977 &((struct mic_copy_desc __user *)argp)->out_len, 978 &copy.out_len, sizeof(copy.out_len))) 979 ret = -EFAULT; 980_unlock_ret: 981 mutex_unlock(&vdev->vdev_mutex); 982 return ret; 983 } 984 case MIC_VIRTIO_CONFIG_CHANGE: 985 { 986 void *buf; 987 988 mutex_lock(&vdev->vdev_mutex); 989 ret = vop_vdev_inited(vdev); 990 if (ret) 991 goto __unlock_ret; 992 buf = memdup_user(argp, vdev->dd->config_len); 993 if (IS_ERR(buf)) { 994 ret = PTR_ERR(buf); 995 goto __unlock_ret; 996 } 997 ret = vop_virtio_config_change(vdev, buf); 998 kfree(buf); 999__unlock_ret: 1000 mutex_unlock(&vdev->vdev_mutex); 1001 return ret; 1002 } 1003 default: 1004 return -ENOIOCTLCMD; 1005 }; 1006 return 0; 1007} 1008 1009/* 1010 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and 1011 * not when previously enqueued buffers may be available. This means that 1012 * in the card->host (TX) path, when userspace is unblocked by poll it 1013 * must drain all available descriptors or it can stall. 1014 */ 1015static __poll_t vop_poll(struct file *f, poll_table *wait) 1016{ 1017 struct vop_vdev *vdev = f->private_data; 1018 __poll_t mask = 0; 1019 1020 mutex_lock(&vdev->vdev_mutex); 1021 if (vop_vdev_inited(vdev)) { 1022 mask = EPOLLERR; 1023 goto done; 1024 } 1025 poll_wait(f, &vdev->waitq, wait); 1026 if (vop_vdev_inited(vdev)) { 1027 mask = EPOLLERR; 1028 } else if (vdev->poll_wake) { 1029 vdev->poll_wake = 0; 1030 mask = EPOLLIN | EPOLLOUT; 1031 } 1032done: 1033 mutex_unlock(&vdev->vdev_mutex); 1034 return mask; 1035} 1036 1037static inline int 1038vop_query_offset(struct vop_vdev *vdev, unsigned long offset, 1039 unsigned long *size, unsigned long *pa) 1040{ 1041 struct vop_device *vpdev = vdev->vpdev; 1042 unsigned long start = MIC_DP_SIZE; 1043 int i; 1044 1045 /* 1046 * MMAP interface is as follows: 1047 * offset region 1048 * 0x0 virtio device_page 1049 * 0x1000 first vring 1050 * 0x1000 + size of 1st vring second vring 1051 * .... 1052 */ 1053 if (!offset) { 1054 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev)); 1055 *size = MIC_DP_SIZE; 1056 return 0; 1057 } 1058 1059 for (i = 0; i < vdev->dd->num_vq; i++) { 1060 struct vop_vringh *vvr = &vdev->vvr[i]; 1061 1062 if (offset == start) { 1063 *pa = virt_to_phys(vvr->vring.va); 1064 *size = vvr->vring.len; 1065 return 0; 1066 } 1067 start += vvr->vring.len; 1068 } 1069 return -1; 1070} 1071 1072/* 1073 * Maps the device page and virtio rings to user space for readonly access. 1074 */ 1075static int vop_mmap(struct file *f, struct vm_area_struct *vma) 1076{ 1077 struct vop_vdev *vdev = f->private_data; 1078 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 1079 unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; 1080 int i, err; 1081 1082 err = vop_vdev_inited(vdev); 1083 if (err) 1084 goto ret; 1085 if (vma->vm_flags & VM_WRITE) { 1086 err = -EACCES; 1087 goto ret; 1088 } 1089 while (size_rem) { 1090 i = vop_query_offset(vdev, offset, &size, &pa); 1091 if (i < 0) { 1092 err = -EINVAL; 1093 goto ret; 1094 } 1095 err = remap_pfn_range(vma, vma->vm_start + offset, 1096 pa >> PAGE_SHIFT, size, 1097 vma->vm_page_prot); 1098 if (err) 1099 goto ret; 1100 size_rem -= size; 1101 offset += size; 1102 } 1103ret: 1104 return err; 1105} 1106 1107static const struct file_operations vop_fops = { 1108 .open = vop_open, 1109 .release = vop_release, 1110 .unlocked_ioctl = vop_ioctl, 1111 .poll = vop_poll, 1112 .mmap = vop_mmap, 1113 .owner = THIS_MODULE, 1114}; 1115 1116int vop_host_init(struct vop_info *vi) 1117{ 1118 int rc; 1119 struct miscdevice *mdev; 1120 struct vop_device *vpdev = vi->vpdev; 1121 1122 INIT_LIST_HEAD(&vi->vdev_list); 1123 vi->dma_ch = vpdev->dma_ch; 1124 mdev = &vi->miscdev; 1125 mdev->minor = MISC_DYNAMIC_MINOR; 1126 snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index); 1127 mdev->name = vi->name; 1128 mdev->fops = &vop_fops; 1129 mdev->parent = &vpdev->dev; 1130 1131 rc = misc_register(mdev); 1132 if (rc) 1133 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc); 1134 return rc; 1135} 1136 1137void vop_host_uninit(struct vop_info *vi) 1138{ 1139 struct list_head *pos, *tmp; 1140 struct vop_vdev *vdev; 1141 1142 mutex_lock(&vi->vop_mutex); 1143 vop_virtio_reset_devices(vi); 1144 list_for_each_safe(pos, tmp, &vi->vdev_list) { 1145 vdev = list_entry(pos, struct vop_vdev, list); 1146 list_del(pos); 1147 reinit_completion(&vdev->destroy); 1148 mutex_unlock(&vi->vop_mutex); 1149 mutex_lock(&vdev->vdev_mutex); 1150 vop_virtio_del_device(vdev); 1151 vdev->deleted = true; 1152 mutex_unlock(&vdev->vdev_mutex); 1153 complete(&vdev->destroy); 1154 mutex_lock(&vi->vop_mutex); 1155 } 1156 mutex_unlock(&vi->vop_mutex); 1157 misc_deregister(&vi->miscdev); 1158}