Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.1 1170 lines 30 kB view raw
1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2016 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * The full GNU General Public License is included in this distribution in 16 * the file called "COPYING". 17 * 18 * Intel Virtio Over PCIe (VOP) driver. 19 * 20 */ 21#include <linux/sched.h> 22#include <linux/poll.h> 23#include <linux/dma-mapping.h> 24 25#include <linux/mic_common.h> 26#include "../common/mic_dev.h" 27 28#include <linux/mic_ioctl.h> 29#include "vop_main.h" 30 31/* Helper API to obtain the VOP PCIe device */ 32static inline struct device *vop_dev(struct vop_vdev *vdev) 33{ 34 return vdev->vpdev->dev.parent; 35} 36 37/* Helper API to check if a virtio device is initialized */ 38static inline int vop_vdev_inited(struct vop_vdev *vdev) 39{ 40 if (!vdev) 41 return -EINVAL; 42 /* Device has not been created yet */ 43 if (!vdev->dd || !vdev->dd->type) { 44 dev_err(vop_dev(vdev), "%s %d err %d\n", 45 __func__, __LINE__, -EINVAL); 46 return -EINVAL; 47 } 48 /* Device has been removed/deleted */ 49 if (vdev->dd->type == -1) { 50 dev_dbg(vop_dev(vdev), "%s %d err %d\n", 51 __func__, __LINE__, -ENODEV); 52 return -ENODEV; 53 } 54 return 0; 55} 56 57static void _vop_notify(struct vringh *vrh) 58{ 59 struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh); 60 struct vop_vdev *vdev = vvrh->vdev; 61 struct vop_device *vpdev = vdev->vpdev; 62 s8 db = vdev->dc->h2c_vdev_db; 63 64 if (db != -1) 65 vpdev->hw_ops->send_intr(vpdev, db); 66} 67 68static void vop_virtio_init_post(struct vop_vdev *vdev) 69{ 70 struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd); 71 struct vop_device *vpdev = vdev->vpdev; 72 int i, used_size; 73 74 for (i = 0; i < vdev->dd->num_vq; i++) { 75 used_size = PAGE_ALIGN(sizeof(u16) * 3 + 76 sizeof(struct vring_used_elem) * 77 le16_to_cpu(vqconfig->num)); 78 if (!le64_to_cpu(vqconfig[i].used_address)) { 79 dev_warn(vop_dev(vdev), "used_address zero??\n"); 80 continue; 81 } 82 vdev->vvr[i].vrh.vring.used = 83 (void __force *)vpdev->hw_ops->remap( 84 vpdev, 85 le64_to_cpu(vqconfig[i].used_address), 86 used_size); 87 } 88 89 vdev->dc->used_address_updated = 0; 90 91 dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n", 92 __func__, vdev->virtio_id); 93} 94 95static inline void vop_virtio_device_reset(struct vop_vdev *vdev) 96{ 97 int i; 98 99 dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n", 100 __func__, vdev->dd->status, vdev->virtio_id); 101 102 for (i = 0; i < vdev->dd->num_vq; i++) 103 /* 104 * Avoid lockdep false positive. The + 1 is for the vop 105 * mutex which is held in the reset devices code path. 106 */ 107 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 108 109 /* 0 status means "reset" */ 110 vdev->dd->status = 0; 111 vdev->dc->vdev_reset = 0; 112 vdev->dc->host_ack = 1; 113 114 for (i = 0; i < vdev->dd->num_vq; i++) { 115 struct vringh *vrh = &vdev->vvr[i].vrh; 116 117 vdev->vvr[i].vring.info->avail_idx = 0; 118 vrh->completed = 0; 119 vrh->last_avail_idx = 0; 120 vrh->last_used_idx = 0; 121 } 122 123 for (i = 0; i < vdev->dd->num_vq; i++) 124 mutex_unlock(&vdev->vvr[i].vr_mutex); 125} 126 127static void vop_virtio_reset_devices(struct vop_info *vi) 128{ 129 struct list_head *pos, *tmp; 130 struct vop_vdev *vdev; 131 132 list_for_each_safe(pos, tmp, &vi->vdev_list) { 133 vdev = list_entry(pos, struct vop_vdev, list); 134 vop_virtio_device_reset(vdev); 135 vdev->poll_wake = 1; 136 wake_up(&vdev->waitq); 137 } 138} 139 140static void vop_bh_handler(struct work_struct *work) 141{ 142 struct vop_vdev *vdev = container_of(work, struct vop_vdev, 143 virtio_bh_work); 144 145 if (vdev->dc->used_address_updated) 146 vop_virtio_init_post(vdev); 147 148 if (vdev->dc->vdev_reset) 149 vop_virtio_device_reset(vdev); 150 151 vdev->poll_wake = 1; 152 wake_up(&vdev->waitq); 153} 154 155static irqreturn_t _vop_virtio_intr_handler(int irq, void *data) 156{ 157 struct vop_vdev *vdev = data; 158 struct vop_device *vpdev = vdev->vpdev; 159 160 vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db); 161 schedule_work(&vdev->virtio_bh_work); 162 return IRQ_HANDLED; 163} 164 165static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp) 166{ 167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 168 int ret = 0, retry, i; 169 struct vop_device *vpdev = vdev->vpdev; 170 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 171 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 172 s8 db = bootparam->h2c_config_db; 173 174 mutex_lock(&vi->vop_mutex); 175 for (i = 0; i < vdev->dd->num_vq; i++) 176 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); 177 178 if (db == -1 || vdev->dd->type == -1) { 179 ret = -EIO; 180 goto exit; 181 } 182 183 memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len); 184 vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; 185 vpdev->hw_ops->send_intr(vpdev, db); 186 187 for (retry = 100; retry--;) { 188 ret = wait_event_timeout(wake, vdev->dc->guest_ack, 189 msecs_to_jiffies(100)); 190 if (ret) 191 break; 192 } 193 194 dev_dbg(vop_dev(vdev), 195 "%s %d retry: %d\n", __func__, __LINE__, retry); 196 vdev->dc->config_change = 0; 197 vdev->dc->guest_ack = 0; 198exit: 199 for (i = 0; i < vdev->dd->num_vq; i++) 200 mutex_unlock(&vdev->vvr[i].vr_mutex); 201 mutex_unlock(&vi->vop_mutex); 202 return ret; 203} 204 205static int vop_copy_dp_entry(struct vop_vdev *vdev, 206 struct mic_device_desc *argp, __u8 *type, 207 struct mic_device_desc **devpage) 208{ 209 struct vop_device *vpdev = vdev->vpdev; 210 struct mic_device_desc *devp; 211 struct mic_vqconfig *vqconfig; 212 int ret = 0, i; 213 bool slot_found = false; 214 215 vqconfig = mic_vq_config(argp); 216 for (i = 0; i < argp->num_vq; i++) { 217 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { 218 ret = -EINVAL; 219 dev_err(vop_dev(vdev), "%s %d err %d\n", 220 __func__, __LINE__, ret); 221 goto exit; 222 } 223 } 224 225 /* Find the first free device page entry */ 226 for (i = sizeof(struct mic_bootparam); 227 i < MIC_DP_SIZE - mic_total_desc_size(argp); 228 i += mic_total_desc_size(devp)) { 229 devp = vpdev->hw_ops->get_dp(vpdev) + i; 230 if (devp->type == 0 || devp->type == -1) { 231 slot_found = true; 232 break; 233 } 234 } 235 if (!slot_found) { 236 ret = -EINVAL; 237 dev_err(vop_dev(vdev), "%s %d err %d\n", 238 __func__, __LINE__, ret); 239 goto exit; 240 } 241 /* 242 * Save off the type before doing the memcpy. Type will be set in the 243 * end after completing all initialization for the new device. 244 */ 245 *type = argp->type; 246 argp->type = 0; 247 memcpy(devp, argp, mic_desc_size(argp)); 248 249 *devpage = devp; 250exit: 251 return ret; 252} 253 254static void vop_init_device_ctrl(struct vop_vdev *vdev, 255 struct mic_device_desc *devpage) 256{ 257 struct mic_device_ctrl *dc; 258 259 dc = (void *)devpage + mic_aligned_desc_size(devpage); 260 261 dc->config_change = 0; 262 dc->guest_ack = 0; 263 dc->vdev_reset = 0; 264 dc->host_ack = 0; 265 dc->used_address_updated = 0; 266 dc->c2h_vdev_db = -1; 267 dc->h2c_vdev_db = -1; 268 vdev->dc = dc; 269} 270 271static int vop_virtio_add_device(struct vop_vdev *vdev, 272 struct mic_device_desc *argp) 273{ 274 struct vop_info *vi = vdev->vi; 275 struct vop_device *vpdev = vi->vpdev; 276 struct mic_device_desc *dd = NULL; 277 struct mic_vqconfig *vqconfig; 278 int vr_size, i, j, ret; 279 u8 type = 0; 280 s8 db = -1; 281 char irqname[16]; 282 struct mic_bootparam *bootparam; 283 u16 num; 284 dma_addr_t vr_addr; 285 286 bootparam = vpdev->hw_ops->get_dp(vpdev); 287 init_waitqueue_head(&vdev->waitq); 288 INIT_LIST_HEAD(&vdev->list); 289 vdev->vpdev = vpdev; 290 291 ret = vop_copy_dp_entry(vdev, argp, &type, &dd); 292 if (ret) { 293 dev_err(vop_dev(vdev), "%s %d err %d\n", 294 __func__, __LINE__, ret); 295 return ret; 296 } 297 298 vop_init_device_ctrl(vdev, dd); 299 300 vdev->dd = dd; 301 vdev->virtio_id = type; 302 vqconfig = mic_vq_config(dd); 303 INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler); 304 305 for (i = 0; i < dd->num_vq; i++) { 306 struct vop_vringh *vvr = &vdev->vvr[i]; 307 struct mic_vring *vr = &vdev->vvr[i].vring; 308 309 num = le16_to_cpu(vqconfig[i].num); 310 mutex_init(&vvr->vr_mutex); 311 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + 312 sizeof(struct _mic_vring_info)); 313 vr->va = (void *) 314 __get_free_pages(GFP_KERNEL | __GFP_ZERO, 315 get_order(vr_size)); 316 if (!vr->va) { 317 ret = -ENOMEM; 318 dev_err(vop_dev(vdev), "%s %d err %d\n", 319 __func__, __LINE__, ret); 320 goto err; 321 } 322 vr->len = vr_size; 323 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); 324 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i); 325 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size, 326 DMA_BIDIRECTIONAL); 327 if (dma_mapping_error(&vpdev->dev, vr_addr)) { 328 free_pages((unsigned long)vr->va, get_order(vr_size)); 329 ret = -ENOMEM; 330 dev_err(vop_dev(vdev), "%s %d err %d\n", 331 __func__, __LINE__, ret); 332 goto err; 333 } 334 vqconfig[i].address = cpu_to_le64(vr_addr); 335 336 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); 337 ret = vringh_init_kern(&vvr->vrh, 338 *(u32 *)mic_vq_features(vdev->dd), 339 num, false, vr->vr.desc, vr->vr.avail, 340 vr->vr.used); 341 if (ret) { 342 dev_err(vop_dev(vdev), "%s %d err %d\n", 343 __func__, __LINE__, ret); 344 goto err; 345 } 346 vringh_kiov_init(&vvr->riov, NULL, 0); 347 vringh_kiov_init(&vvr->wiov, NULL, 0); 348 vvr->head = USHRT_MAX; 349 vvr->vdev = vdev; 350 vvr->vrh.notify = _vop_notify; 351 dev_dbg(&vpdev->dev, 352 "%s %d index %d va %p info %p vr_size 0x%x\n", 353 __func__, __LINE__, i, vr->va, vr->info, vr_size); 354 vvr->buf = (void *)__get_free_pages(GFP_KERNEL, 355 get_order(VOP_INT_DMA_BUF_SIZE)); 356 vvr->buf_da = dma_map_single(&vpdev->dev, 357 vvr->buf, VOP_INT_DMA_BUF_SIZE, 358 DMA_BIDIRECTIONAL); 359 } 360 361 snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index, 362 vdev->virtio_id); 363 vdev->virtio_db = vpdev->hw_ops->next_db(vpdev); 364 vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, 365 _vop_virtio_intr_handler, irqname, vdev, 366 vdev->virtio_db); 367 if (IS_ERR(vdev->virtio_cookie)) { 368 ret = PTR_ERR(vdev->virtio_cookie); 369 dev_dbg(&vpdev->dev, "request irq failed\n"); 370 goto err; 371 } 372 373 vdev->dc->c2h_vdev_db = vdev->virtio_db; 374 375 /* 376 * Order the type update with previous stores. This write barrier 377 * is paired with the corresponding read barrier before the uncached 378 * system memory read of the type, on the card while scanning the 379 * device page. 380 */ 381 smp_wmb(); 382 dd->type = type; 383 argp->type = type; 384 385 if (bootparam) { 386 db = bootparam->h2c_config_db; 387 if (db != -1) 388 vpdev->hw_ops->send_intr(vpdev, db); 389 } 390 dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db); 391 return 0; 392err: 393 vqconfig = mic_vq_config(dd); 394 for (j = 0; j < i; j++) { 395 struct vop_vringh *vvr = &vdev->vvr[j]; 396 397 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address), 398 vvr->vring.len, DMA_BIDIRECTIONAL); 399 free_pages((unsigned long)vvr->vring.va, 400 get_order(vvr->vring.len)); 401 } 402 return ret; 403} 404 405static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp, 406 struct vop_device *vpdev) 407{ 408 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 409 s8 db; 410 int ret, retry; 411 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 412 413 devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; 414 db = bootparam->h2c_config_db; 415 if (db != -1) 416 vpdev->hw_ops->send_intr(vpdev, db); 417 else 418 goto done; 419 for (retry = 15; retry--;) { 420 ret = wait_event_timeout(wake, devp->guest_ack, 421 msecs_to_jiffies(1000)); 422 if (ret) 423 break; 424 } 425done: 426 devp->config_change = 0; 427 devp->guest_ack = 0; 428} 429 430static void vop_virtio_del_device(struct vop_vdev *vdev) 431{ 432 struct vop_info *vi = vdev->vi; 433 struct vop_device *vpdev = vdev->vpdev; 434 int i; 435 struct mic_vqconfig *vqconfig; 436 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); 437 438 if (!bootparam) 439 goto skip_hot_remove; 440 vop_dev_remove(vi, vdev->dc, vpdev); 441skip_hot_remove: 442 vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); 443 flush_work(&vdev->virtio_bh_work); 444 vqconfig = mic_vq_config(vdev->dd); 445 for (i = 0; i < vdev->dd->num_vq; i++) { 446 struct vop_vringh *vvr = &vdev->vvr[i]; 447 448 dma_unmap_single(&vpdev->dev, 449 vvr->buf_da, VOP_INT_DMA_BUF_SIZE, 450 DMA_BIDIRECTIONAL); 451 free_pages((unsigned long)vvr->buf, 452 get_order(VOP_INT_DMA_BUF_SIZE)); 453 vringh_kiov_cleanup(&vvr->riov); 454 vringh_kiov_cleanup(&vvr->wiov); 455 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address), 456 vvr->vring.len, DMA_BIDIRECTIONAL); 457 free_pages((unsigned long)vvr->vring.va, 458 get_order(vvr->vring.len)); 459 } 460 /* 461 * Order the type update with previous stores. This write barrier 462 * is paired with the corresponding read barrier before the uncached 463 * system memory read of the type, on the card while scanning the 464 * device page. 465 */ 466 smp_wmb(); 467 vdev->dd->type = -1; 468} 469 470/* 471 * vop_sync_dma - Wrapper for synchronous DMAs. 472 * 473 * @dev - The address of the pointer to the device instance used 474 * for DMA registration. 475 * @dst - destination DMA address. 476 * @src - source DMA address. 477 * @len - size of the transfer. 478 * 479 * Return DMA_SUCCESS on success 480 */ 481static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src, 482 size_t len) 483{ 484 int err = 0; 485 struct dma_device *ddev; 486 struct dma_async_tx_descriptor *tx; 487 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 488 struct dma_chan *vop_ch = vi->dma_ch; 489 490 if (!vop_ch) { 491 err = -EBUSY; 492 goto error; 493 } 494 ddev = vop_ch->device; 495 tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len, 496 DMA_PREP_FENCE); 497 if (!tx) { 498 err = -ENOMEM; 499 goto error; 500 } else { 501 dma_cookie_t cookie; 502 503 cookie = tx->tx_submit(tx); 504 if (dma_submit_error(cookie)) { 505 err = -ENOMEM; 506 goto error; 507 } 508 dma_async_issue_pending(vop_ch); 509 err = dma_sync_wait(vop_ch, cookie); 510 } 511error: 512 if (err) 513 dev_err(&vi->vpdev->dev, "%s %d err %d\n", 514 __func__, __LINE__, err); 515 return err; 516} 517 518#define VOP_USE_DMA true 519 520/* 521 * Initiates the copies across the PCIe bus from card memory to a user 522 * space buffer. When transfers are done using DMA, source/destination 523 * addresses and transfer length must follow the alignment requirements of 524 * the MIC DMA engine. 525 */ 526static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf, 527 size_t len, u64 daddr, size_t dlen, 528 int vr_idx) 529{ 530 struct vop_device *vpdev = vdev->vpdev; 531 void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len); 532 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 533 struct vop_info *vi = dev_get_drvdata(&vpdev->dev); 534 size_t dma_alignment; 535 bool x200; 536 size_t dma_offset, partlen; 537 int err; 538 539 if (!VOP_USE_DMA || !vi->dma_ch) { 540 if (copy_to_user(ubuf, (void __force *)dbuf, len)) { 541 err = -EFAULT; 542 dev_err(vop_dev(vdev), "%s %d err %d\n", 543 __func__, __LINE__, err); 544 goto err; 545 } 546 vdev->in_bytes += len; 547 err = 0; 548 goto err; 549 } 550 551 dma_alignment = 1 << vi->dma_ch->device->copy_align; 552 x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 553 554 dma_offset = daddr - round_down(daddr, dma_alignment); 555 daddr -= dma_offset; 556 len += dma_offset; 557 /* 558 * X100 uses DMA addresses as seen by the card so adding 559 * the aperture base is not required for DMA. However x200 560 * requires DMA addresses to be an offset into the bar so 561 * add the aperture base for x200. 562 */ 563 if (x200) 564 daddr += vpdev->aper->pa; 565 while (len) { 566 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 567 err = vop_sync_dma(vdev, vvr->buf_da, daddr, 568 ALIGN(partlen, dma_alignment)); 569 if (err) { 570 dev_err(vop_dev(vdev), "%s %d err %d\n", 571 __func__, __LINE__, err); 572 goto err; 573 } 574 if (copy_to_user(ubuf, vvr->buf + dma_offset, 575 partlen - dma_offset)) { 576 err = -EFAULT; 577 dev_err(vop_dev(vdev), "%s %d err %d\n", 578 __func__, __LINE__, err); 579 goto err; 580 } 581 daddr += partlen; 582 ubuf += partlen; 583 dbuf += partlen; 584 vdev->in_bytes_dma += partlen; 585 vdev->in_bytes += partlen; 586 len -= partlen; 587 dma_offset = 0; 588 } 589 err = 0; 590err: 591 vpdev->hw_ops->unmap(vpdev, dbuf); 592 dev_dbg(vop_dev(vdev), 593 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n", 594 __func__, ubuf, dbuf, len, vr_idx); 595 return err; 596} 597 598/* 599 * Initiates copies across the PCIe bus from a user space buffer to card 600 * memory. When transfers are done using DMA, source/destination addresses 601 * and transfer length must follow the alignment requirements of the MIC 602 * DMA engine. 603 */ 604static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf, 605 size_t len, u64 daddr, size_t dlen, 606 int vr_idx) 607{ 608 struct vop_device *vpdev = vdev->vpdev; 609 void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len); 610 struct vop_vringh *vvr = &vdev->vvr[vr_idx]; 611 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); 612 size_t dma_alignment; 613 bool x200; 614 size_t partlen; 615 bool dma = VOP_USE_DMA && vi->dma_ch; 616 int err = 0; 617 618 if (dma) { 619 dma_alignment = 1 << vi->dma_ch->device->copy_align; 620 x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); 621 622 if (daddr & (dma_alignment - 1)) { 623 vdev->tx_dst_unaligned += len; 624 dma = false; 625 } else if (ALIGN(len, dma_alignment) > dlen) { 626 vdev->tx_len_unaligned += len; 627 dma = false; 628 } 629 } 630 631 if (!dma) 632 goto memcpy; 633 634 /* 635 * X100 uses DMA addresses as seen by the card so adding 636 * the aperture base is not required for DMA. However x200 637 * requires DMA addresses to be an offset into the bar so 638 * add the aperture base for x200. 639 */ 640 if (x200) 641 daddr += vpdev->aper->pa; 642 while (len) { 643 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); 644 645 if (copy_from_user(vvr->buf, ubuf, partlen)) { 646 err = -EFAULT; 647 dev_err(vop_dev(vdev), "%s %d err %d\n", 648 __func__, __LINE__, err); 649 goto err; 650 } 651 err = vop_sync_dma(vdev, daddr, vvr->buf_da, 652 ALIGN(partlen, dma_alignment)); 653 if (err) { 654 dev_err(vop_dev(vdev), "%s %d err %d\n", 655 __func__, __LINE__, err); 656 goto err; 657 } 658 daddr += partlen; 659 ubuf += partlen; 660 dbuf += partlen; 661 vdev->out_bytes_dma += partlen; 662 vdev->out_bytes += partlen; 663 len -= partlen; 664 } 665memcpy: 666 /* 667 * We are copying to IO below and should ideally use something 668 * like copy_from_user_toio(..) if it existed. 669 */ 670 if (copy_from_user((void __force *)dbuf, ubuf, len)) { 671 err = -EFAULT; 672 dev_err(vop_dev(vdev), "%s %d err %d\n", 673 __func__, __LINE__, err); 674 goto err; 675 } 676 vdev->out_bytes += len; 677 err = 0; 678err: 679 vpdev->hw_ops->unmap(vpdev, dbuf); 680 dev_dbg(vop_dev(vdev), 681 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n", 682 __func__, ubuf, dbuf, len, vr_idx); 683 return err; 684} 685 686#define MIC_VRINGH_READ true 687 688/* Determine the total number of bytes consumed in a VRINGH KIOV */ 689static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov) 690{ 691 int i; 692 u32 total = iov->consumed; 693 694 for (i = 0; i < iov->i; i++) 695 total += iov->iov[i].iov_len; 696 return total; 697} 698 699/* 700 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. 701 * This API is heavily based on the vringh_iov_xfer(..) implementation 702 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) 703 * and vringh_iov_push_kern(..) directly is because there is no 704 * way to override the VRINGH xfer(..) routines as of v3.10. 705 */ 706static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov, 707 void __user *ubuf, size_t len, bool read, int vr_idx, 708 size_t *out_len) 709{ 710 int ret = 0; 711 size_t partlen, tot_len = 0; 712 713 while (len && iov->i < iov->used) { 714 struct kvec *kiov = &iov->iov[iov->i]; 715 unsigned long daddr = (unsigned long)kiov->iov_base; 716 717 partlen = min(kiov->iov_len, len); 718 if (read) 719 ret = vop_virtio_copy_to_user(vdev, ubuf, partlen, 720 daddr, 721 kiov->iov_len, 722 vr_idx); 723 else 724 ret = vop_virtio_copy_from_user(vdev, ubuf, partlen, 725 daddr, 726 kiov->iov_len, 727 vr_idx); 728 if (ret) { 729 dev_err(vop_dev(vdev), "%s %d err %d\n", 730 __func__, __LINE__, ret); 731 break; 732 } 733 len -= partlen; 734 ubuf += partlen; 735 tot_len += partlen; 736 iov->consumed += partlen; 737 kiov->iov_len -= partlen; 738 kiov->iov_base += partlen; 739 if (!kiov->iov_len) { 740 /* Fix up old iov element then increment. */ 741 kiov->iov_len = iov->consumed; 742 kiov->iov_base -= iov->consumed; 743 744 iov->consumed = 0; 745 iov->i++; 746 } 747 } 748 *out_len = tot_len; 749 return ret; 750} 751 752/* 753 * Use the standard VRINGH infrastructure in the kernel to fetch new 754 * descriptors, initiate the copies and update the used ring. 755 */ 756static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy) 757{ 758 int ret = 0; 759 u32 iovcnt = copy->iovcnt; 760 struct iovec iov; 761 struct iovec __user *u_iov = copy->iov; 762 void __user *ubuf = NULL; 763 struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx]; 764 struct vringh_kiov *riov = &vvr->riov; 765 struct vringh_kiov *wiov = &vvr->wiov; 766 struct vringh *vrh = &vvr->vrh; 767 u16 *head = &vvr->head; 768 struct mic_vring *vr = &vvr->vring; 769 size_t len = 0, out_len; 770 771 copy->out_len = 0; 772 /* Fetch a new IOVEC if all previous elements have been processed */ 773 if (riov->i == riov->used && wiov->i == wiov->used) { 774 ret = vringh_getdesc_kern(vrh, riov, wiov, 775 head, GFP_KERNEL); 776 /* Check if there are available descriptors */ 777 if (ret <= 0) 778 return ret; 779 } 780 while (iovcnt) { 781 if (!len) { 782 /* Copy over a new iovec from user space. */ 783 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); 784 if (ret) { 785 ret = -EINVAL; 786 dev_err(vop_dev(vdev), "%s %d err %d\n", 787 __func__, __LINE__, ret); 788 break; 789 } 790 len = iov.iov_len; 791 ubuf = iov.iov_base; 792 } 793 /* Issue all the read descriptors first */ 794 ret = vop_vringh_copy(vdev, riov, ubuf, len, 795 MIC_VRINGH_READ, copy->vr_idx, &out_len); 796 if (ret) { 797 dev_err(vop_dev(vdev), "%s %d err %d\n", 798 __func__, __LINE__, ret); 799 break; 800 } 801 len -= out_len; 802 ubuf += out_len; 803 copy->out_len += out_len; 804 /* Issue the write descriptors next */ 805 ret = vop_vringh_copy(vdev, wiov, ubuf, len, 806 !MIC_VRINGH_READ, copy->vr_idx, &out_len); 807 if (ret) { 808 dev_err(vop_dev(vdev), "%s %d err %d\n", 809 __func__, __LINE__, ret); 810 break; 811 } 812 len -= out_len; 813 ubuf += out_len; 814 copy->out_len += out_len; 815 if (!len) { 816 /* One user space iovec is now completed */ 817 iovcnt--; 818 u_iov++; 819 } 820 /* Exit loop if all elements in KIOVs have been processed. */ 821 if (riov->i == riov->used && wiov->i == wiov->used) 822 break; 823 } 824 /* 825 * Update the used ring if a descriptor was available and some data was 826 * copied in/out and the user asked for a used ring update. 827 */ 828 if (*head != USHRT_MAX && copy->out_len && copy->update_used) { 829 u32 total = 0; 830 831 /* Determine the total data consumed */ 832 total += vop_vringh_iov_consumed(riov); 833 total += vop_vringh_iov_consumed(wiov); 834 vringh_complete_kern(vrh, *head, total); 835 *head = USHRT_MAX; 836 if (vringh_need_notify_kern(vrh) > 0) 837 vringh_notify(vrh); 838 vringh_kiov_cleanup(riov); 839 vringh_kiov_cleanup(wiov); 840 /* Update avail idx for user space */ 841 vr->info->avail_idx = vrh->last_avail_idx; 842 } 843 return ret; 844} 845 846static inline int vop_verify_copy_args(struct vop_vdev *vdev, 847 struct mic_copy_desc *copy) 848{ 849 if (!vdev || copy->vr_idx >= vdev->dd->num_vq) 850 return -EINVAL; 851 return 0; 852} 853 854/* Copy a specified number of virtio descriptors in a chain */ 855static int vop_virtio_copy_desc(struct vop_vdev *vdev, 856 struct mic_copy_desc *copy) 857{ 858 int err; 859 struct vop_vringh *vvr; 860 861 err = vop_verify_copy_args(vdev, copy); 862 if (err) 863 return err; 864 865 vvr = &vdev->vvr[copy->vr_idx]; 866 mutex_lock(&vvr->vr_mutex); 867 if (!vop_vdevup(vdev)) { 868 err = -ENODEV; 869 dev_err(vop_dev(vdev), "%s %d err %d\n", 870 __func__, __LINE__, err); 871 goto err; 872 } 873 err = _vop_virtio_copy(vdev, copy); 874 if (err) { 875 dev_err(vop_dev(vdev), "%s %d err %d\n", 876 __func__, __LINE__, err); 877 } 878err: 879 mutex_unlock(&vvr->vr_mutex); 880 return err; 881} 882 883static int vop_open(struct inode *inode, struct file *f) 884{ 885 struct vop_vdev *vdev; 886 struct vop_info *vi = container_of(f->private_data, 887 struct vop_info, miscdev); 888 889 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 890 if (!vdev) 891 return -ENOMEM; 892 vdev->vi = vi; 893 mutex_init(&vdev->vdev_mutex); 894 f->private_data = vdev; 895 init_completion(&vdev->destroy); 896 complete(&vdev->destroy); 897 return 0; 898} 899 900static int vop_release(struct inode *inode, struct file *f) 901{ 902 struct vop_vdev *vdev = f->private_data, *vdev_tmp; 903 struct vop_info *vi = vdev->vi; 904 struct list_head *pos, *tmp; 905 bool found = false; 906 907 mutex_lock(&vdev->vdev_mutex); 908 if (vdev->deleted) 909 goto unlock; 910 mutex_lock(&vi->vop_mutex); 911 list_for_each_safe(pos, tmp, &vi->vdev_list) { 912 vdev_tmp = list_entry(pos, struct vop_vdev, list); 913 if (vdev == vdev_tmp) { 914 vop_virtio_del_device(vdev); 915 list_del(pos); 916 found = true; 917 break; 918 } 919 } 920 mutex_unlock(&vi->vop_mutex); 921unlock: 922 mutex_unlock(&vdev->vdev_mutex); 923 if (!found) 924 wait_for_completion(&vdev->destroy); 925 f->private_data = NULL; 926 kfree(vdev); 927 return 0; 928} 929 930static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 931{ 932 struct vop_vdev *vdev = f->private_data; 933 struct vop_info *vi = vdev->vi; 934 void __user *argp = (void __user *)arg; 935 int ret; 936 937 switch (cmd) { 938 case MIC_VIRTIO_ADD_DEVICE: 939 { 940 struct mic_device_desc dd, *dd_config; 941 942 if (copy_from_user(&dd, argp, sizeof(dd))) 943 return -EFAULT; 944 945 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || 946 dd.num_vq > MIC_MAX_VRINGS) 947 return -EINVAL; 948 949 dd_config = memdup_user(argp, mic_desc_size(&dd)); 950 if (IS_ERR(dd_config)) 951 return PTR_ERR(dd_config); 952 953 /* Ensure desc has not changed between the two reads */ 954 if (memcmp(&dd, dd_config, sizeof(dd))) { 955 ret = -EINVAL; 956 goto free_ret; 957 } 958 mutex_lock(&vdev->vdev_mutex); 959 mutex_lock(&vi->vop_mutex); 960 ret = vop_virtio_add_device(vdev, dd_config); 961 if (ret) 962 goto unlock_ret; 963 list_add_tail(&vdev->list, &vi->vdev_list); 964unlock_ret: 965 mutex_unlock(&vi->vop_mutex); 966 mutex_unlock(&vdev->vdev_mutex); 967free_ret: 968 kfree(dd_config); 969 return ret; 970 } 971 case MIC_VIRTIO_COPY_DESC: 972 { 973 struct mic_copy_desc copy; 974 975 mutex_lock(&vdev->vdev_mutex); 976 ret = vop_vdev_inited(vdev); 977 if (ret) 978 goto _unlock_ret; 979 980 if (copy_from_user(&copy, argp, sizeof(copy))) { 981 ret = -EFAULT; 982 goto _unlock_ret; 983 } 984 985 ret = vop_virtio_copy_desc(vdev, &copy); 986 if (ret < 0) 987 goto _unlock_ret; 988 if (copy_to_user( 989 &((struct mic_copy_desc __user *)argp)->out_len, 990 &copy.out_len, sizeof(copy.out_len))) 991 ret = -EFAULT; 992_unlock_ret: 993 mutex_unlock(&vdev->vdev_mutex); 994 return ret; 995 } 996 case MIC_VIRTIO_CONFIG_CHANGE: 997 { 998 void *buf; 999 1000 mutex_lock(&vdev->vdev_mutex); 1001 ret = vop_vdev_inited(vdev); 1002 if (ret) 1003 goto __unlock_ret; 1004 buf = memdup_user(argp, vdev->dd->config_len); 1005 if (IS_ERR(buf)) { 1006 ret = PTR_ERR(buf); 1007 goto __unlock_ret; 1008 } 1009 ret = vop_virtio_config_change(vdev, buf); 1010 kfree(buf); 1011__unlock_ret: 1012 mutex_unlock(&vdev->vdev_mutex); 1013 return ret; 1014 } 1015 default: 1016 return -ENOIOCTLCMD; 1017 }; 1018 return 0; 1019} 1020 1021/* 1022 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and 1023 * not when previously enqueued buffers may be available. This means that 1024 * in the card->host (TX) path, when userspace is unblocked by poll it 1025 * must drain all available descriptors or it can stall. 1026 */ 1027static __poll_t vop_poll(struct file *f, poll_table *wait) 1028{ 1029 struct vop_vdev *vdev = f->private_data; 1030 __poll_t mask = 0; 1031 1032 mutex_lock(&vdev->vdev_mutex); 1033 if (vop_vdev_inited(vdev)) { 1034 mask = EPOLLERR; 1035 goto done; 1036 } 1037 poll_wait(f, &vdev->waitq, wait); 1038 if (vop_vdev_inited(vdev)) { 1039 mask = EPOLLERR; 1040 } else if (vdev->poll_wake) { 1041 vdev->poll_wake = 0; 1042 mask = EPOLLIN | EPOLLOUT; 1043 } 1044done: 1045 mutex_unlock(&vdev->vdev_mutex); 1046 return mask; 1047} 1048 1049static inline int 1050vop_query_offset(struct vop_vdev *vdev, unsigned long offset, 1051 unsigned long *size, unsigned long *pa) 1052{ 1053 struct vop_device *vpdev = vdev->vpdev; 1054 unsigned long start = MIC_DP_SIZE; 1055 int i; 1056 1057 /* 1058 * MMAP interface is as follows: 1059 * offset region 1060 * 0x0 virtio device_page 1061 * 0x1000 first vring 1062 * 0x1000 + size of 1st vring second vring 1063 * .... 1064 */ 1065 if (!offset) { 1066 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev)); 1067 *size = MIC_DP_SIZE; 1068 return 0; 1069 } 1070 1071 for (i = 0; i < vdev->dd->num_vq; i++) { 1072 struct vop_vringh *vvr = &vdev->vvr[i]; 1073 1074 if (offset == start) { 1075 *pa = virt_to_phys(vvr->vring.va); 1076 *size = vvr->vring.len; 1077 return 0; 1078 } 1079 start += vvr->vring.len; 1080 } 1081 return -1; 1082} 1083 1084/* 1085 * Maps the device page and virtio rings to user space for readonly access. 1086 */ 1087static int vop_mmap(struct file *f, struct vm_area_struct *vma) 1088{ 1089 struct vop_vdev *vdev = f->private_data; 1090 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 1091 unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; 1092 int i, err; 1093 1094 err = vop_vdev_inited(vdev); 1095 if (err) 1096 goto ret; 1097 if (vma->vm_flags & VM_WRITE) { 1098 err = -EACCES; 1099 goto ret; 1100 } 1101 while (size_rem) { 1102 i = vop_query_offset(vdev, offset, &size, &pa); 1103 if (i < 0) { 1104 err = -EINVAL; 1105 goto ret; 1106 } 1107 err = remap_pfn_range(vma, vma->vm_start + offset, 1108 pa >> PAGE_SHIFT, size, 1109 vma->vm_page_prot); 1110 if (err) 1111 goto ret; 1112 size_rem -= size; 1113 offset += size; 1114 } 1115ret: 1116 return err; 1117} 1118 1119static const struct file_operations vop_fops = { 1120 .open = vop_open, 1121 .release = vop_release, 1122 .unlocked_ioctl = vop_ioctl, 1123 .poll = vop_poll, 1124 .mmap = vop_mmap, 1125 .owner = THIS_MODULE, 1126}; 1127 1128int vop_host_init(struct vop_info *vi) 1129{ 1130 int rc; 1131 struct miscdevice *mdev; 1132 struct vop_device *vpdev = vi->vpdev; 1133 1134 INIT_LIST_HEAD(&vi->vdev_list); 1135 vi->dma_ch = vpdev->dma_ch; 1136 mdev = &vi->miscdev; 1137 mdev->minor = MISC_DYNAMIC_MINOR; 1138 snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index); 1139 mdev->name = vi->name; 1140 mdev->fops = &vop_fops; 1141 mdev->parent = &vpdev->dev; 1142 1143 rc = misc_register(mdev); 1144 if (rc) 1145 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc); 1146 return rc; 1147} 1148 1149void vop_host_uninit(struct vop_info *vi) 1150{ 1151 struct list_head *pos, *tmp; 1152 struct vop_vdev *vdev; 1153 1154 mutex_lock(&vi->vop_mutex); 1155 vop_virtio_reset_devices(vi); 1156 list_for_each_safe(pos, tmp, &vi->vdev_list) { 1157 vdev = list_entry(pos, struct vop_vdev, list); 1158 list_del(pos); 1159 reinit_completion(&vdev->destroy); 1160 mutex_unlock(&vi->vop_mutex); 1161 mutex_lock(&vdev->vdev_mutex); 1162 vop_virtio_del_device(vdev); 1163 vdev->deleted = true; 1164 mutex_unlock(&vdev->vdev_mutex); 1165 complete(&vdev->destroy); 1166 mutex_lock(&vi->vop_mutex); 1167 } 1168 mutex_unlock(&vi->vop_mutex); 1169 misc_deregister(&vi->miscdev); 1170}