Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.1-rc3 1948 lines 53 kB view raw
1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2015 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * Intel SCIF driver. 16 * 17 */ 18#include "scif_main.h" 19#include "scif_map.h" 20 21/* 22 * struct scif_dma_comp_cb - SCIF DMA completion callback 23 * 24 * @dma_completion_func: DMA completion callback 25 * @cb_cookie: DMA completion callback cookie 26 * @temp_buf: Temporary buffer 27 * @temp_buf_to_free: Temporary buffer to be freed 28 * @is_cache: Is a kmem_cache allocated buffer 29 * @dst_offset: Destination registration offset 30 * @dst_window: Destination registration window 31 * @len: Length of the temp buffer 32 * @temp_phys: DMA address of the temp buffer 33 * @sdev: The SCIF device 34 * @header_padding: padding for cache line alignment 35 */ 36struct scif_dma_comp_cb { 37 void (*dma_completion_func)(void *cookie); 38 void *cb_cookie; 39 u8 *temp_buf; 40 u8 *temp_buf_to_free; 41 bool is_cache; 42 s64 dst_offset; 43 struct scif_window *dst_window; 44 size_t len; 45 dma_addr_t temp_phys; 46 struct scif_dev *sdev; 47 int header_padding; 48}; 49 50/** 51 * struct scif_copy_work - Work for DMA copy 52 * 53 * @src_offset: Starting source offset 54 * @dst_offset: Starting destination offset 55 * @src_window: Starting src registered window 56 * @dst_window: Starting dst registered window 57 * @loopback: true if this is a loopback DMA transfer 58 * @len: Length of the transfer 59 * @comp_cb: DMA copy completion callback 60 * @remote_dev: The remote SCIF peer device 61 * @fence_type: polling or interrupt based 62 * @ordered: is this a tail byte ordered DMA transfer 63 */ 64struct scif_copy_work { 65 s64 src_offset; 66 s64 dst_offset; 67 struct scif_window *src_window; 68 struct scif_window *dst_window; 69 int loopback; 70 size_t len; 71 struct scif_dma_comp_cb *comp_cb; 72 struct scif_dev *remote_dev; 73 int fence_type; 74 bool ordered; 75}; 76 77/** 78 * scif_reserve_dma_chan: 79 * @ep: Endpoint Descriptor. 80 * 81 * This routine reserves a DMA channel for a particular 82 * endpoint. All DMA transfers for an endpoint are always 83 * programmed on the same DMA channel. 84 */ 85int scif_reserve_dma_chan(struct scif_endpt *ep) 86{ 87 int err = 0; 88 struct scif_dev *scifdev; 89 struct scif_hw_dev *sdev; 90 struct dma_chan *chan; 91 92 /* Loopback DMAs are not supported on the management node */ 93 if (!scif_info.nodeid && scifdev_self(ep->remote_dev)) 94 return 0; 95 if (scif_info.nodeid) 96 scifdev = &scif_dev[0]; 97 else 98 scifdev = ep->remote_dev; 99 sdev = scifdev->sdev; 100 if (!sdev->num_dma_ch) 101 return -ENODEV; 102 chan = sdev->dma_ch[scifdev->dma_ch_idx]; 103 scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch; 104 mutex_lock(&ep->rma_info.rma_lock); 105 ep->rma_info.dma_chan = chan; 106 mutex_unlock(&ep->rma_info.rma_lock); 107 return err; 108} 109 110#ifdef CONFIG_MMU_NOTIFIER 111/** 112 * scif_rma_destroy_tcw: 113 * 114 * This routine destroys temporary cached windows 115 */ 116static 117void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, 118 u64 start, u64 len) 119{ 120 struct list_head *item, *tmp; 121 struct scif_window *window; 122 u64 start_va, end_va; 123 u64 end = start + len; 124 125 if (end <= start) 126 return; 127 128 list_for_each_safe(item, tmp, &mmn->tc_reg_list) { 129 window = list_entry(item, struct scif_window, list); 130 if (!len) 131 break; 132 start_va = window->va_for_temp; 133 end_va = start_va + (window->nr_pages << PAGE_SHIFT); 134 if (start < start_va && end <= start_va) 135 break; 136 if (start >= end_va) 137 continue; 138 __scif_rma_destroy_tcw_helper(window); 139 } 140} 141 142static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len) 143{ 144 struct scif_endpt *ep = mmn->ep; 145 146 spin_lock(&ep->rma_info.tc_lock); 147 __scif_rma_destroy_tcw(mmn, start, len); 148 spin_unlock(&ep->rma_info.tc_lock); 149} 150 151static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep) 152{ 153 struct list_head *item, *tmp; 154 struct scif_mmu_notif *mmn; 155 156 list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { 157 mmn = list_entry(item, struct scif_mmu_notif, list); 158 scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); 159 } 160} 161 162static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep) 163{ 164 struct list_head *item, *tmp; 165 struct scif_mmu_notif *mmn; 166 167 spin_lock(&ep->rma_info.tc_lock); 168 list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { 169 mmn = list_entry(item, struct scif_mmu_notif, list); 170 __scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); 171 } 172 spin_unlock(&ep->rma_info.tc_lock); 173} 174 175static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) 176{ 177 if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit) 178 return false; 179 if ((atomic_read(&ep->rma_info.tcw_total_pages) 180 + (cur_bytes >> PAGE_SHIFT)) > 181 scif_info.rma_tc_limit) { 182 dev_info(scif_info.mdev.this_device, 183 "%s %d total=%d, current=%zu reached max\n", 184 __func__, __LINE__, 185 atomic_read(&ep->rma_info.tcw_total_pages), 186 (1 + (cur_bytes >> PAGE_SHIFT))); 187 scif_rma_destroy_tcw_invalid(); 188 __scif_rma_destroy_tcw_ep(ep); 189 } 190 return true; 191} 192 193static void scif_mmu_notifier_release(struct mmu_notifier *mn, 194 struct mm_struct *mm) 195{ 196 struct scif_mmu_notif *mmn; 197 198 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); 199 scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); 200 schedule_work(&scif_info.misc_work); 201} 202 203static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 204 const struct mmu_notifier_range *range) 205{ 206 struct scif_mmu_notif *mmn; 207 208 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); 209 scif_rma_destroy_tcw(mmn, range->start, range->end - range->start); 210 211 return 0; 212} 213 214static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 215 const struct mmu_notifier_range *range) 216{ 217 /* 218 * Nothing to do here, everything needed was done in 219 * invalidate_range_start. 220 */ 221} 222 223static const struct mmu_notifier_ops scif_mmu_notifier_ops = { 224 .release = scif_mmu_notifier_release, 225 .clear_flush_young = NULL, 226 .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, 227 .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; 228 229static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep) 230{ 231 struct scif_endpt_rma_info *rma = &ep->rma_info; 232 struct scif_mmu_notif *mmn = NULL; 233 struct list_head *item, *tmp; 234 235 mutex_lock(&ep->rma_info.mmn_lock); 236 list_for_each_safe(item, tmp, &rma->mmn_list) { 237 mmn = list_entry(item, struct scif_mmu_notif, list); 238 mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm); 239 list_del(item); 240 kfree(mmn); 241 } 242 mutex_unlock(&ep->rma_info.mmn_lock); 243} 244 245static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn, 246 struct mm_struct *mm, struct scif_endpt *ep) 247{ 248 mmn->ep = ep; 249 mmn->mm = mm; 250 mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops; 251 INIT_LIST_HEAD(&mmn->list); 252 INIT_LIST_HEAD(&mmn->tc_reg_list); 253} 254 255static struct scif_mmu_notif * 256scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma) 257{ 258 struct scif_mmu_notif *mmn; 259 260 list_for_each_entry(mmn, &rma->mmn_list, list) 261 if (mmn->mm == mm) 262 return mmn; 263 return NULL; 264} 265 266static struct scif_mmu_notif * 267scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) 268{ 269 struct scif_mmu_notif *mmn 270 = kzalloc(sizeof(*mmn), GFP_KERNEL); 271 272 if (!mmn) 273 return ERR_PTR(-ENOMEM); 274 275 scif_init_mmu_notifier(mmn, current->mm, ep); 276 if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) { 277 kfree(mmn); 278 return ERR_PTR(-EBUSY); 279 } 280 list_add(&mmn->list, &ep->rma_info.mmn_list); 281 return mmn; 282} 283 284/* 285 * Called from the misc thread to destroy temporary cached windows and 286 * unregister the MMU notifier for the SCIF endpoint. 287 */ 288void scif_mmu_notif_handler(struct work_struct *work) 289{ 290 struct list_head *pos, *tmpq; 291 struct scif_endpt *ep; 292restart: 293 scif_rma_destroy_tcw_invalid(); 294 spin_lock(&scif_info.rmalock); 295 list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) { 296 ep = list_entry(pos, struct scif_endpt, mmu_list); 297 list_del(&ep->mmu_list); 298 spin_unlock(&scif_info.rmalock); 299 scif_rma_destroy_tcw_ep(ep); 300 scif_ep_unregister_mmu_notifier(ep); 301 goto restart; 302 } 303 spin_unlock(&scif_info.rmalock); 304} 305 306static bool scif_is_set_reg_cache(int flags) 307{ 308 return !!(flags & SCIF_RMA_USECACHE); 309} 310#else 311static struct scif_mmu_notif * 312scif_find_mmu_notifier(struct mm_struct *mm, 313 struct scif_endpt_rma_info *rma) 314{ 315 return NULL; 316} 317 318static struct scif_mmu_notif * 319scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) 320{ 321 return NULL; 322} 323 324void scif_mmu_notif_handler(struct work_struct *work) 325{ 326} 327 328static bool scif_is_set_reg_cache(int flags) 329{ 330 return false; 331} 332 333static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) 334{ 335 return false; 336} 337#endif 338 339/** 340 * scif_register_temp: 341 * @epd: End Point Descriptor. 342 * @addr: virtual address to/from which to copy 343 * @len: length of range to copy 344 * @out_offset: computed offset returned by reference. 345 * @out_window: allocated registered window returned by reference. 346 * 347 * Create a temporary registered window. The peer will not know about this 348 * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's. 349 */ 350static int 351scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot, 352 off_t *out_offset, struct scif_window **out_window) 353{ 354 struct scif_endpt *ep = (struct scif_endpt *)epd; 355 int err; 356 scif_pinned_pages_t pinned_pages; 357 size_t aligned_len; 358 359 aligned_len = ALIGN(len, PAGE_SIZE); 360 361 err = __scif_pin_pages((void *)(addr & PAGE_MASK), 362 aligned_len, &prot, 0, &pinned_pages); 363 if (err) 364 return err; 365 366 pinned_pages->prot = prot; 367 368 /* Compute the offset for this registration */ 369 err = scif_get_window_offset(ep, 0, 0, 370 aligned_len >> PAGE_SHIFT, 371 (s64 *)out_offset); 372 if (err) 373 goto error_unpin; 374 375 /* Allocate and prepare self registration window */ 376 *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT, 377 *out_offset, true); 378 if (!*out_window) { 379 scif_free_window_offset(ep, NULL, *out_offset); 380 err = -ENOMEM; 381 goto error_unpin; 382 } 383 384 (*out_window)->pinned_pages = pinned_pages; 385 (*out_window)->nr_pages = pinned_pages->nr_pages; 386 (*out_window)->prot = pinned_pages->prot; 387 388 (*out_window)->va_for_temp = addr & PAGE_MASK; 389 err = scif_map_window(ep->remote_dev, *out_window); 390 if (err) { 391 /* Something went wrong! Rollback */ 392 scif_destroy_window(ep, *out_window); 393 *out_window = NULL; 394 } else { 395 *out_offset |= (addr - (*out_window)->va_for_temp); 396 } 397 return err; 398error_unpin: 399 if (err) 400 dev_err(&ep->remote_dev->sdev->dev, 401 "%s %d err %d\n", __func__, __LINE__, err); 402 scif_unpin_pages(pinned_pages); 403 return err; 404} 405 406#define SCIF_DMA_TO (3 * HZ) 407 408/* 409 * scif_sync_dma - Program a DMA without an interrupt descriptor 410 * 411 * @dev - The address of the pointer to the device instance used 412 * for DMA registration. 413 * @chan - DMA channel to be used. 414 * @sync_wait: Wait for DMA to complete? 415 * 416 * Return 0 on success and -errno on error. 417 */ 418static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan, 419 bool sync_wait) 420{ 421 int err = 0; 422 struct dma_async_tx_descriptor *tx = NULL; 423 enum dma_ctrl_flags flags = DMA_PREP_FENCE; 424 dma_cookie_t cookie; 425 struct dma_device *ddev; 426 427 if (!chan) { 428 err = -EIO; 429 dev_err(&sdev->dev, "%s %d err %d\n", 430 __func__, __LINE__, err); 431 return err; 432 } 433 ddev = chan->device; 434 435 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); 436 if (!tx) { 437 err = -ENOMEM; 438 dev_err(&sdev->dev, "%s %d err %d\n", 439 __func__, __LINE__, err); 440 goto release; 441 } 442 cookie = tx->tx_submit(tx); 443 444 if (dma_submit_error(cookie)) { 445 err = -ENOMEM; 446 dev_err(&sdev->dev, "%s %d err %d\n", 447 __func__, __LINE__, err); 448 goto release; 449 } 450 if (!sync_wait) { 451 dma_async_issue_pending(chan); 452 } else { 453 if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) { 454 err = 0; 455 } else { 456 err = -EIO; 457 dev_err(&sdev->dev, "%s %d err %d\n", 458 __func__, __LINE__, err); 459 } 460 } 461release: 462 return err; 463} 464 465static void scif_dma_callback(void *arg) 466{ 467 struct completion *done = (struct completion *)arg; 468 469 complete(done); 470} 471 472#define SCIF_DMA_SYNC_WAIT true 473#define SCIF_DMA_POLL BIT(0) 474#define SCIF_DMA_INTR BIT(1) 475 476/* 477 * scif_async_dma - Program a DMA with an interrupt descriptor 478 * 479 * @dev - The address of the pointer to the device instance used 480 * for DMA registration. 481 * @chan - DMA channel to be used. 482 * Return 0 on success and -errno on error. 483 */ 484static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan) 485{ 486 int err = 0; 487 struct dma_device *ddev; 488 struct dma_async_tx_descriptor *tx = NULL; 489 enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE; 490 DECLARE_COMPLETION_ONSTACK(done_wait); 491 dma_cookie_t cookie; 492 enum dma_status status; 493 494 if (!chan) { 495 err = -EIO; 496 dev_err(&sdev->dev, "%s %d err %d\n", 497 __func__, __LINE__, err); 498 return err; 499 } 500 ddev = chan->device; 501 502 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); 503 if (!tx) { 504 err = -ENOMEM; 505 dev_err(&sdev->dev, "%s %d err %d\n", 506 __func__, __LINE__, err); 507 goto release; 508 } 509 reinit_completion(&done_wait); 510 tx->callback = scif_dma_callback; 511 tx->callback_param = &done_wait; 512 cookie = tx->tx_submit(tx); 513 514 if (dma_submit_error(cookie)) { 515 err = -ENOMEM; 516 dev_err(&sdev->dev, "%s %d err %d\n", 517 __func__, __LINE__, err); 518 goto release; 519 } 520 dma_async_issue_pending(chan); 521 522 err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO); 523 if (!err) { 524 err = -EIO; 525 dev_err(&sdev->dev, "%s %d err %d\n", 526 __func__, __LINE__, err); 527 goto release; 528 } 529 err = 0; 530 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); 531 if (status != DMA_COMPLETE) { 532 err = -EIO; 533 dev_err(&sdev->dev, "%s %d err %d\n", 534 __func__, __LINE__, err); 535 goto release; 536 } 537release: 538 return err; 539} 540 541/* 542 * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular 543 * DMA channel via polling. 544 * 545 * @sdev - The SCIF device 546 * @chan - DMA channel 547 * Return 0 on success and -errno on error. 548 */ 549static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan) 550{ 551 if (!chan) 552 return -EINVAL; 553 return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT); 554} 555 556/* 557 * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular 558 * DMA channel via interrupt based blocking wait. 559 * 560 * @sdev - The SCIF device 561 * @chan - DMA channel 562 * Return 0 on success and -errno on error. 563 */ 564int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan) 565{ 566 if (!chan) 567 return -EINVAL; 568 return scif_async_dma(sdev, chan); 569} 570 571/** 572 * scif_rma_destroy_windows: 573 * 574 * This routine destroys all windows queued for cleanup 575 */ 576void scif_rma_destroy_windows(void) 577{ 578 struct list_head *item, *tmp; 579 struct scif_window *window; 580 struct scif_endpt *ep; 581 struct dma_chan *chan; 582 583 might_sleep(); 584restart: 585 spin_lock(&scif_info.rmalock); 586 list_for_each_safe(item, tmp, &scif_info.rma) { 587 window = list_entry(item, struct scif_window, 588 list); 589 ep = (struct scif_endpt *)window->ep; 590 chan = ep->rma_info.dma_chan; 591 592 list_del_init(&window->list); 593 spin_unlock(&scif_info.rmalock); 594 if (!chan || !scifdev_alive(ep) || 595 !scif_drain_dma_intr(ep->remote_dev->sdev, 596 ep->rma_info.dma_chan)) 597 /* Remove window from global list */ 598 window->unreg_state = OP_COMPLETED; 599 else 600 dev_warn(&ep->remote_dev->sdev->dev, 601 "DMA engine hung?\n"); 602 if (window->unreg_state == OP_COMPLETED) { 603 if (window->type == SCIF_WINDOW_SELF) 604 scif_destroy_window(ep, window); 605 else 606 scif_destroy_remote_window(window); 607 atomic_dec(&ep->rma_info.tw_refcount); 608 } 609 goto restart; 610 } 611 spin_unlock(&scif_info.rmalock); 612} 613 614/** 615 * scif_rma_destroy_tcw: 616 * 617 * This routine destroys temporary cached registered windows 618 * which have been queued for cleanup. 619 */ 620void scif_rma_destroy_tcw_invalid(void) 621{ 622 struct list_head *item, *tmp; 623 struct scif_window *window; 624 struct scif_endpt *ep; 625 struct dma_chan *chan; 626 627 might_sleep(); 628restart: 629 spin_lock(&scif_info.rmalock); 630 list_for_each_safe(item, tmp, &scif_info.rma_tc) { 631 window = list_entry(item, struct scif_window, list); 632 ep = (struct scif_endpt *)window->ep; 633 chan = ep->rma_info.dma_chan; 634 list_del_init(&window->list); 635 spin_unlock(&scif_info.rmalock); 636 mutex_lock(&ep->rma_info.rma_lock); 637 if (!chan || !scifdev_alive(ep) || 638 !scif_drain_dma_intr(ep->remote_dev->sdev, 639 ep->rma_info.dma_chan)) { 640 atomic_sub(window->nr_pages, 641 &ep->rma_info.tcw_total_pages); 642 scif_destroy_window(ep, window); 643 atomic_dec(&ep->rma_info.tcw_refcount); 644 } else { 645 dev_warn(&ep->remote_dev->sdev->dev, 646 "DMA engine hung?\n"); 647 } 648 mutex_unlock(&ep->rma_info.rma_lock); 649 goto restart; 650 } 651 spin_unlock(&scif_info.rmalock); 652} 653 654static inline 655void *_get_local_va(off_t off, struct scif_window *window, size_t len) 656{ 657 int page_nr = (off - window->offset) >> PAGE_SHIFT; 658 off_t page_off = off & ~PAGE_MASK; 659 void *va = NULL; 660 661 if (window->type == SCIF_WINDOW_SELF) { 662 struct page **pages = window->pinned_pages->pages; 663 664 va = page_address(pages[page_nr]) + page_off; 665 } 666 return va; 667} 668 669static inline 670void *ioremap_remote(off_t off, struct scif_window *window, 671 size_t len, struct scif_dev *dev, 672 struct scif_window_iter *iter) 673{ 674 dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter); 675 676 /* 677 * If the DMA address is not card relative then we need the DMA 678 * addresses to be an offset into the bar. The aperture base was already 679 * added so subtract it here since scif_ioremap is going to add it again 680 */ 681 if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && 682 dev->sdev->aper && !dev->sdev->card_rel_da) 683 phys = phys - dev->sdev->aper->pa; 684 return scif_ioremap(phys, len, dev); 685} 686 687static inline void 688iounmap_remote(void *virt, size_t size, struct scif_copy_work *work) 689{ 690 scif_iounmap(virt, size, work->remote_dev); 691} 692 693/* 694 * Takes care of ordering issue caused by 695 * 1. Hardware: Only in the case of cpu copy from mgmt node to card 696 * because of WC memory. 697 * 2. Software: If memcpy reorders copy instructions for optimization. 698 * This could happen at both mgmt node and card. 699 */ 700static inline void 701scif_ordered_memcpy_toio(char *dst, const char *src, size_t count) 702{ 703 if (!count) 704 return; 705 706 memcpy_toio((void __iomem __force *)dst, src, --count); 707 /* Order the last byte with the previous stores */ 708 wmb(); 709 *(dst + count) = *(src + count); 710} 711 712static inline void scif_unaligned_cpy_toio(char *dst, const char *src, 713 size_t count, bool ordered) 714{ 715 if (ordered) 716 scif_ordered_memcpy_toio(dst, src, count); 717 else 718 memcpy_toio((void __iomem __force *)dst, src, count); 719} 720 721static inline 722void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count) 723{ 724 if (!count) 725 return; 726 727 memcpy_fromio(dst, (void __iomem __force *)src, --count); 728 /* Order the last byte with the previous loads */ 729 rmb(); 730 *(dst + count) = *(src + count); 731} 732 733static inline void scif_unaligned_cpy_fromio(char *dst, const char *src, 734 size_t count, bool ordered) 735{ 736 if (ordered) 737 scif_ordered_memcpy_fromio(dst, src, count); 738 else 739 memcpy_fromio(dst, (void __iomem __force *)src, count); 740} 741 742#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0) 743 744/* 745 * scif_off_to_dma_addr: 746 * Obtain the dma_addr given the window and the offset. 747 * @window: Registered window. 748 * @off: Window offset. 749 * @nr_bytes: Return the number of contiguous bytes till next DMA addr index. 750 * @index: Return the index of the dma_addr array found. 751 * @start_off: start offset of index of the dma addr array found. 752 * The nr_bytes provides the callee an estimate of the maximum possible 753 * DMA xfer possible while the index/start_off provide faster lookups 754 * for the next iteration. 755 */ 756dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, 757 size_t *nr_bytes, struct scif_window_iter *iter) 758{ 759 int i, page_nr; 760 s64 start, end; 761 off_t page_off; 762 763 if (window->nr_pages == window->nr_contig_chunks) { 764 page_nr = (off - window->offset) >> PAGE_SHIFT; 765 page_off = off & ~PAGE_MASK; 766 767 if (nr_bytes) 768 *nr_bytes = PAGE_SIZE - page_off; 769 return window->dma_addr[page_nr] | page_off; 770 } 771 if (iter) { 772 i = iter->index; 773 start = iter->offset; 774 } else { 775 i = 0; 776 start = window->offset; 777 } 778 for (; i < window->nr_contig_chunks; i++) { 779 end = start + (window->num_pages[i] << PAGE_SHIFT); 780 if (off >= start && off < end) { 781 if (iter) { 782 iter->index = i; 783 iter->offset = start; 784 } 785 if (nr_bytes) 786 *nr_bytes = end - off; 787 return (window->dma_addr[i] + (off - start)); 788 } 789 start += (window->num_pages[i] << PAGE_SHIFT); 790 } 791 dev_err(scif_info.mdev.this_device, 792 "%s %d BUG. Addr not found? window %p off 0x%llx\n", 793 __func__, __LINE__, window, off); 794 return SCIF_RMA_ERROR_CODE; 795} 796 797/* 798 * Copy between rma window and temporary buffer 799 */ 800static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window, 801 u8 *temp, size_t rem_len, bool to_temp) 802{ 803 void *window_virt; 804 size_t loop_len; 805 int offset_in_page; 806 s64 end_offset; 807 808 offset_in_page = offset & ~PAGE_MASK; 809 loop_len = PAGE_SIZE - offset_in_page; 810 811 if (rem_len < loop_len) 812 loop_len = rem_len; 813 814 window_virt = _get_local_va(offset, window, loop_len); 815 if (!window_virt) 816 return; 817 if (to_temp) 818 memcpy(temp, window_virt, loop_len); 819 else 820 memcpy(window_virt, temp, loop_len); 821 822 offset += loop_len; 823 temp += loop_len; 824 rem_len -= loop_len; 825 826 end_offset = window->offset + 827 (window->nr_pages << PAGE_SHIFT); 828 while (rem_len) { 829 if (offset == end_offset) { 830 window = list_next_entry(window, list); 831 end_offset = window->offset + 832 (window->nr_pages << PAGE_SHIFT); 833 } 834 loop_len = min(PAGE_SIZE, rem_len); 835 window_virt = _get_local_va(offset, window, loop_len); 836 if (!window_virt) 837 return; 838 if (to_temp) 839 memcpy(temp, window_virt, loop_len); 840 else 841 memcpy(window_virt, temp, loop_len); 842 offset += loop_len; 843 temp += loop_len; 844 rem_len -= loop_len; 845 } 846} 847 848/** 849 * scif_rma_completion_cb: 850 * @data: RMA cookie 851 * 852 * RMA interrupt completion callback. 853 */ 854static void scif_rma_completion_cb(void *data) 855{ 856 struct scif_dma_comp_cb *comp_cb = data; 857 858 /* Free DMA Completion CB. */ 859 if (comp_cb->dst_window) 860 scif_rma_local_cpu_copy(comp_cb->dst_offset, 861 comp_cb->dst_window, 862 comp_cb->temp_buf + 863 comp_cb->header_padding, 864 comp_cb->len, false); 865 scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev, 866 SCIF_KMEM_UNALIGNED_BUF_SIZE); 867 if (comp_cb->is_cache) 868 kmem_cache_free(unaligned_cache, 869 comp_cb->temp_buf_to_free); 870 else 871 kfree(comp_cb->temp_buf_to_free); 872} 873 874/* Copies between temporary buffer and offsets provided in work */ 875static int 876scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work, 877 u8 *temp, struct dma_chan *chan, 878 bool src_local) 879{ 880 struct scif_dma_comp_cb *comp_cb = work->comp_cb; 881 dma_addr_t window_dma_addr, temp_dma_addr; 882 dma_addr_t temp_phys = comp_cb->temp_phys; 883 size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; 884 int offset_in_ca, ret = 0; 885 s64 end_offset, offset; 886 struct scif_window *window; 887 void *window_virt_addr; 888 size_t tail_len; 889 struct dma_async_tx_descriptor *tx; 890 struct dma_device *dev = chan->device; 891 dma_cookie_t cookie; 892 893 if (src_local) { 894 offset = work->dst_offset; 895 window = work->dst_window; 896 } else { 897 offset = work->src_offset; 898 window = work->src_window; 899 } 900 901 offset_in_ca = offset & (L1_CACHE_BYTES - 1); 902 if (offset_in_ca) { 903 loop_len = L1_CACHE_BYTES - offset_in_ca; 904 loop_len = min(loop_len, remaining_len); 905 window_virt_addr = ioremap_remote(offset, window, 906 loop_len, 907 work->remote_dev, 908 NULL); 909 if (!window_virt_addr) 910 return -ENOMEM; 911 if (src_local) 912 scif_unaligned_cpy_toio(window_virt_addr, temp, 913 loop_len, 914 work->ordered && 915 !(remaining_len - loop_len)); 916 else 917 scif_unaligned_cpy_fromio(temp, window_virt_addr, 918 loop_len, work->ordered && 919 !(remaining_len - loop_len)); 920 iounmap_remote(window_virt_addr, loop_len, work); 921 922 offset += loop_len; 923 temp += loop_len; 924 temp_phys += loop_len; 925 remaining_len -= loop_len; 926 } 927 928 offset_in_ca = offset & ~PAGE_MASK; 929 end_offset = window->offset + 930 (window->nr_pages << PAGE_SHIFT); 931 932 tail_len = remaining_len & (L1_CACHE_BYTES - 1); 933 remaining_len -= tail_len; 934 while (remaining_len) { 935 if (offset == end_offset) { 936 window = list_next_entry(window, list); 937 end_offset = window->offset + 938 (window->nr_pages << PAGE_SHIFT); 939 } 940 if (scif_is_mgmt_node()) 941 temp_dma_addr = temp_phys; 942 else 943 /* Fix if we ever enable IOMMU on the card */ 944 temp_dma_addr = (dma_addr_t)virt_to_phys(temp); 945 window_dma_addr = scif_off_to_dma_addr(window, offset, 946 &nr_contig_bytes, 947 NULL); 948 loop_len = min(nr_contig_bytes, remaining_len); 949 if (src_local) { 950 if (work->ordered && !tail_len && 951 !(remaining_len - loop_len) && 952 loop_len != L1_CACHE_BYTES) { 953 /* 954 * Break up the last chunk of the transfer into 955 * two steps. if there is no tail to guarantee 956 * DMA ordering. SCIF_DMA_POLLING inserts 957 * a status update descriptor in step 1 which 958 * acts as a double sided synchronization fence 959 * for the DMA engine to ensure that the last 960 * cache line in step 2 is updated last. 961 */ 962 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ 963 tx = 964 dev->device_prep_dma_memcpy(chan, 965 window_dma_addr, 966 temp_dma_addr, 967 loop_len - 968 L1_CACHE_BYTES, 969 DMA_PREP_FENCE); 970 if (!tx) { 971 ret = -ENOMEM; 972 goto err; 973 } 974 cookie = tx->tx_submit(tx); 975 if (dma_submit_error(cookie)) { 976 ret = -ENOMEM; 977 goto err; 978 } 979 dma_async_issue_pending(chan); 980 offset += (loop_len - L1_CACHE_BYTES); 981 temp_dma_addr += (loop_len - L1_CACHE_BYTES); 982 window_dma_addr += (loop_len - L1_CACHE_BYTES); 983 remaining_len -= (loop_len - L1_CACHE_BYTES); 984 loop_len = remaining_len; 985 986 /* Step 2) DMA: L1_CACHE_BYTES */ 987 tx = 988 dev->device_prep_dma_memcpy(chan, 989 window_dma_addr, 990 temp_dma_addr, 991 loop_len, 0); 992 if (!tx) { 993 ret = -ENOMEM; 994 goto err; 995 } 996 cookie = tx->tx_submit(tx); 997 if (dma_submit_error(cookie)) { 998 ret = -ENOMEM; 999 goto err; 1000 } 1001 dma_async_issue_pending(chan); 1002 } else { 1003 tx = 1004 dev->device_prep_dma_memcpy(chan, 1005 window_dma_addr, 1006 temp_dma_addr, 1007 loop_len, 0); 1008 if (!tx) { 1009 ret = -ENOMEM; 1010 goto err; 1011 } 1012 cookie = tx->tx_submit(tx); 1013 if (dma_submit_error(cookie)) { 1014 ret = -ENOMEM; 1015 goto err; 1016 } 1017 dma_async_issue_pending(chan); 1018 } 1019 } else { 1020 tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr, 1021 window_dma_addr, loop_len, 0); 1022 if (!tx) { 1023 ret = -ENOMEM; 1024 goto err; 1025 } 1026 cookie = tx->tx_submit(tx); 1027 if (dma_submit_error(cookie)) { 1028 ret = -ENOMEM; 1029 goto err; 1030 } 1031 dma_async_issue_pending(chan); 1032 } 1033 offset += loop_len; 1034 temp += loop_len; 1035 temp_phys += loop_len; 1036 remaining_len -= loop_len; 1037 offset_in_ca = 0; 1038 } 1039 if (tail_len) { 1040 if (offset == end_offset) { 1041 window = list_next_entry(window, list); 1042 end_offset = window->offset + 1043 (window->nr_pages << PAGE_SHIFT); 1044 } 1045 window_virt_addr = ioremap_remote(offset, window, tail_len, 1046 work->remote_dev, 1047 NULL); 1048 if (!window_virt_addr) 1049 return -ENOMEM; 1050 /* 1051 * The CPU copy for the tail bytes must be initiated only once 1052 * previous DMA transfers for this endpoint have completed 1053 * to guarantee ordering. 1054 */ 1055 if (work->ordered) { 1056 struct scif_dev *rdev = work->remote_dev; 1057 1058 ret = scif_drain_dma_intr(rdev->sdev, chan); 1059 if (ret) 1060 return ret; 1061 } 1062 if (src_local) 1063 scif_unaligned_cpy_toio(window_virt_addr, temp, 1064 tail_len, work->ordered); 1065 else 1066 scif_unaligned_cpy_fromio(temp, window_virt_addr, 1067 tail_len, work->ordered); 1068 iounmap_remote(window_virt_addr, tail_len, work); 1069 } 1070 tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT); 1071 if (!tx) { 1072 ret = -ENOMEM; 1073 return ret; 1074 } 1075 tx->callback = &scif_rma_completion_cb; 1076 tx->callback_param = comp_cb; 1077 cookie = tx->tx_submit(tx); 1078 1079 if (dma_submit_error(cookie)) { 1080 ret = -ENOMEM; 1081 return ret; 1082 } 1083 dma_async_issue_pending(chan); 1084 return 0; 1085err: 1086 dev_err(scif_info.mdev.this_device, 1087 "%s %d Desc Prog Failed ret %d\n", 1088 __func__, __LINE__, ret); 1089 return ret; 1090} 1091 1092/* 1093 * _scif_rma_list_dma_copy_aligned: 1094 * 1095 * Traverse all the windows and perform DMA copy. 1096 */ 1097static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, 1098 struct dma_chan *chan) 1099{ 1100 dma_addr_t src_dma_addr, dst_dma_addr; 1101 size_t loop_len, remaining_len, src_contig_bytes = 0; 1102 size_t dst_contig_bytes = 0; 1103 struct scif_window_iter src_win_iter; 1104 struct scif_window_iter dst_win_iter; 1105 s64 end_src_offset, end_dst_offset; 1106 struct scif_window *src_window = work->src_window; 1107 struct scif_window *dst_window = work->dst_window; 1108 s64 src_offset = work->src_offset, dst_offset = work->dst_offset; 1109 int ret = 0; 1110 struct dma_async_tx_descriptor *tx; 1111 struct dma_device *dev = chan->device; 1112 dma_cookie_t cookie; 1113 1114 remaining_len = work->len; 1115 1116 scif_init_window_iter(src_window, &src_win_iter); 1117 scif_init_window_iter(dst_window, &dst_win_iter); 1118 end_src_offset = src_window->offset + 1119 (src_window->nr_pages << PAGE_SHIFT); 1120 end_dst_offset = dst_window->offset + 1121 (dst_window->nr_pages << PAGE_SHIFT); 1122 while (remaining_len) { 1123 if (src_offset == end_src_offset) { 1124 src_window = list_next_entry(src_window, list); 1125 end_src_offset = src_window->offset + 1126 (src_window->nr_pages << PAGE_SHIFT); 1127 scif_init_window_iter(src_window, &src_win_iter); 1128 } 1129 if (dst_offset == end_dst_offset) { 1130 dst_window = list_next_entry(dst_window, list); 1131 end_dst_offset = dst_window->offset + 1132 (dst_window->nr_pages << PAGE_SHIFT); 1133 scif_init_window_iter(dst_window, &dst_win_iter); 1134 } 1135 1136 /* compute dma addresses for transfer */ 1137 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, 1138 &src_contig_bytes, 1139 &src_win_iter); 1140 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, 1141 &dst_contig_bytes, 1142 &dst_win_iter); 1143 loop_len = min(src_contig_bytes, dst_contig_bytes); 1144 loop_len = min(loop_len, remaining_len); 1145 if (work->ordered && !(remaining_len - loop_len)) { 1146 /* 1147 * Break up the last chunk of the transfer into two 1148 * steps to ensure that the last byte in step 2 is 1149 * updated last. 1150 */ 1151 /* Step 1) DMA: Body Length - 1 */ 1152 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1153 src_dma_addr, 1154 loop_len - 1, 1155 DMA_PREP_FENCE); 1156 if (!tx) { 1157 ret = -ENOMEM; 1158 goto err; 1159 } 1160 cookie = tx->tx_submit(tx); 1161 if (dma_submit_error(cookie)) { 1162 ret = -ENOMEM; 1163 goto err; 1164 } 1165 src_offset += (loop_len - 1); 1166 dst_offset += (loop_len - 1); 1167 src_dma_addr += (loop_len - 1); 1168 dst_dma_addr += (loop_len - 1); 1169 remaining_len -= (loop_len - 1); 1170 loop_len = remaining_len; 1171 1172 /* Step 2) DMA: 1 BYTES */ 1173 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1174 src_dma_addr, loop_len, 0); 1175 if (!tx) { 1176 ret = -ENOMEM; 1177 goto err; 1178 } 1179 cookie = tx->tx_submit(tx); 1180 if (dma_submit_error(cookie)) { 1181 ret = -ENOMEM; 1182 goto err; 1183 } 1184 dma_async_issue_pending(chan); 1185 } else { 1186 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1187 src_dma_addr, loop_len, 0); 1188 if (!tx) { 1189 ret = -ENOMEM; 1190 goto err; 1191 } 1192 cookie = tx->tx_submit(tx); 1193 if (dma_submit_error(cookie)) { 1194 ret = -ENOMEM; 1195 goto err; 1196 } 1197 } 1198 src_offset += loop_len; 1199 dst_offset += loop_len; 1200 remaining_len -= loop_len; 1201 } 1202 return ret; 1203err: 1204 dev_err(scif_info.mdev.this_device, 1205 "%s %d Desc Prog Failed ret %d\n", 1206 __func__, __LINE__, ret); 1207 return ret; 1208} 1209 1210/* 1211 * scif_rma_list_dma_copy_aligned: 1212 * 1213 * Traverse all the windows and perform DMA copy. 1214 */ 1215static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, 1216 struct dma_chan *chan) 1217{ 1218 dma_addr_t src_dma_addr, dst_dma_addr; 1219 size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0; 1220 size_t dst_contig_bytes = 0; 1221 int src_cache_off; 1222 s64 end_src_offset, end_dst_offset; 1223 struct scif_window_iter src_win_iter; 1224 struct scif_window_iter dst_win_iter; 1225 void *src_virt, *dst_virt; 1226 struct scif_window *src_window = work->src_window; 1227 struct scif_window *dst_window = work->dst_window; 1228 s64 src_offset = work->src_offset, dst_offset = work->dst_offset; 1229 int ret = 0; 1230 struct dma_async_tx_descriptor *tx; 1231 struct dma_device *dev = chan->device; 1232 dma_cookie_t cookie; 1233 1234 remaining_len = work->len; 1235 scif_init_window_iter(src_window, &src_win_iter); 1236 scif_init_window_iter(dst_window, &dst_win_iter); 1237 1238 src_cache_off = src_offset & (L1_CACHE_BYTES - 1); 1239 if (src_cache_off != 0) { 1240 /* Head */ 1241 loop_len = L1_CACHE_BYTES - src_cache_off; 1242 loop_len = min(loop_len, remaining_len); 1243 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); 1244 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); 1245 if (src_window->type == SCIF_WINDOW_SELF) 1246 src_virt = _get_local_va(src_offset, src_window, 1247 loop_len); 1248 else 1249 src_virt = ioremap_remote(src_offset, src_window, 1250 loop_len, 1251 work->remote_dev, NULL); 1252 if (!src_virt) 1253 return -ENOMEM; 1254 if (dst_window->type == SCIF_WINDOW_SELF) 1255 dst_virt = _get_local_va(dst_offset, dst_window, 1256 loop_len); 1257 else 1258 dst_virt = ioremap_remote(dst_offset, dst_window, 1259 loop_len, 1260 work->remote_dev, NULL); 1261 if (!dst_virt) { 1262 if (src_window->type != SCIF_WINDOW_SELF) 1263 iounmap_remote(src_virt, loop_len, work); 1264 return -ENOMEM; 1265 } 1266 if (src_window->type == SCIF_WINDOW_SELF) 1267 scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, 1268 remaining_len == loop_len ? 1269 work->ordered : false); 1270 else 1271 scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len, 1272 remaining_len == loop_len ? 1273 work->ordered : false); 1274 if (src_window->type != SCIF_WINDOW_SELF) 1275 iounmap_remote(src_virt, loop_len, work); 1276 if (dst_window->type != SCIF_WINDOW_SELF) 1277 iounmap_remote(dst_virt, loop_len, work); 1278 src_offset += loop_len; 1279 dst_offset += loop_len; 1280 remaining_len -= loop_len; 1281 } 1282 1283 end_src_offset = src_window->offset + 1284 (src_window->nr_pages << PAGE_SHIFT); 1285 end_dst_offset = dst_window->offset + 1286 (dst_window->nr_pages << PAGE_SHIFT); 1287 tail_len = remaining_len & (L1_CACHE_BYTES - 1); 1288 remaining_len -= tail_len; 1289 while (remaining_len) { 1290 if (src_offset == end_src_offset) { 1291 src_window = list_next_entry(src_window, list); 1292 end_src_offset = src_window->offset + 1293 (src_window->nr_pages << PAGE_SHIFT); 1294 scif_init_window_iter(src_window, &src_win_iter); 1295 } 1296 if (dst_offset == end_dst_offset) { 1297 dst_window = list_next_entry(dst_window, list); 1298 end_dst_offset = dst_window->offset + 1299 (dst_window->nr_pages << PAGE_SHIFT); 1300 scif_init_window_iter(dst_window, &dst_win_iter); 1301 } 1302 1303 /* compute dma addresses for transfer */ 1304 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, 1305 &src_contig_bytes, 1306 &src_win_iter); 1307 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, 1308 &dst_contig_bytes, 1309 &dst_win_iter); 1310 loop_len = min(src_contig_bytes, dst_contig_bytes); 1311 loop_len = min(loop_len, remaining_len); 1312 if (work->ordered && !tail_len && 1313 !(remaining_len - loop_len)) { 1314 /* 1315 * Break up the last chunk of the transfer into two 1316 * steps. if there is no tail to gurantee DMA ordering. 1317 * Passing SCIF_DMA_POLLING inserts a status update 1318 * descriptor in step 1 which acts as a double sided 1319 * synchronization fence for the DMA engine to ensure 1320 * that the last cache line in step 2 is updated last. 1321 */ 1322 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ 1323 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1324 src_dma_addr, 1325 loop_len - 1326 L1_CACHE_BYTES, 1327 DMA_PREP_FENCE); 1328 if (!tx) { 1329 ret = -ENOMEM; 1330 goto err; 1331 } 1332 cookie = tx->tx_submit(tx); 1333 if (dma_submit_error(cookie)) { 1334 ret = -ENOMEM; 1335 goto err; 1336 } 1337 dma_async_issue_pending(chan); 1338 src_offset += (loop_len - L1_CACHE_BYTES); 1339 dst_offset += (loop_len - L1_CACHE_BYTES); 1340 src_dma_addr += (loop_len - L1_CACHE_BYTES); 1341 dst_dma_addr += (loop_len - L1_CACHE_BYTES); 1342 remaining_len -= (loop_len - L1_CACHE_BYTES); 1343 loop_len = remaining_len; 1344 1345 /* Step 2) DMA: L1_CACHE_BYTES */ 1346 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1347 src_dma_addr, 1348 loop_len, 0); 1349 if (!tx) { 1350 ret = -ENOMEM; 1351 goto err; 1352 } 1353 cookie = tx->tx_submit(tx); 1354 if (dma_submit_error(cookie)) { 1355 ret = -ENOMEM; 1356 goto err; 1357 } 1358 dma_async_issue_pending(chan); 1359 } else { 1360 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, 1361 src_dma_addr, 1362 loop_len, 0); 1363 if (!tx) { 1364 ret = -ENOMEM; 1365 goto err; 1366 } 1367 cookie = tx->tx_submit(tx); 1368 if (dma_submit_error(cookie)) { 1369 ret = -ENOMEM; 1370 goto err; 1371 } 1372 dma_async_issue_pending(chan); 1373 } 1374 src_offset += loop_len; 1375 dst_offset += loop_len; 1376 remaining_len -= loop_len; 1377 } 1378 remaining_len = tail_len; 1379 if (remaining_len) { 1380 loop_len = remaining_len; 1381 if (src_offset == end_src_offset) 1382 src_window = list_next_entry(src_window, list); 1383 if (dst_offset == end_dst_offset) 1384 dst_window = list_next_entry(dst_window, list); 1385 1386 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); 1387 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); 1388 /* 1389 * The CPU copy for the tail bytes must be initiated only once 1390 * previous DMA transfers for this endpoint have completed to 1391 * guarantee ordering. 1392 */ 1393 if (work->ordered) { 1394 struct scif_dev *rdev = work->remote_dev; 1395 1396 ret = scif_drain_dma_poll(rdev->sdev, chan); 1397 if (ret) 1398 return ret; 1399 } 1400 if (src_window->type == SCIF_WINDOW_SELF) 1401 src_virt = _get_local_va(src_offset, src_window, 1402 loop_len); 1403 else 1404 src_virt = ioremap_remote(src_offset, src_window, 1405 loop_len, 1406 work->remote_dev, NULL); 1407 if (!src_virt) 1408 return -ENOMEM; 1409 1410 if (dst_window->type == SCIF_WINDOW_SELF) 1411 dst_virt = _get_local_va(dst_offset, dst_window, 1412 loop_len); 1413 else 1414 dst_virt = ioremap_remote(dst_offset, dst_window, 1415 loop_len, 1416 work->remote_dev, NULL); 1417 if (!dst_virt) { 1418 if (src_window->type != SCIF_WINDOW_SELF) 1419 iounmap_remote(src_virt, loop_len, work); 1420 return -ENOMEM; 1421 } 1422 1423 if (src_window->type == SCIF_WINDOW_SELF) 1424 scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, 1425 work->ordered); 1426 else 1427 scif_unaligned_cpy_fromio(dst_virt, src_virt, 1428 loop_len, work->ordered); 1429 if (src_window->type != SCIF_WINDOW_SELF) 1430 iounmap_remote(src_virt, loop_len, work); 1431 1432 if (dst_window->type != SCIF_WINDOW_SELF) 1433 iounmap_remote(dst_virt, loop_len, work); 1434 remaining_len -= loop_len; 1435 } 1436 return ret; 1437err: 1438 dev_err(scif_info.mdev.this_device, 1439 "%s %d Desc Prog Failed ret %d\n", 1440 __func__, __LINE__, ret); 1441 return ret; 1442} 1443 1444/* 1445 * scif_rma_list_cpu_copy: 1446 * 1447 * Traverse all the windows and perform CPU copy. 1448 */ 1449static int scif_rma_list_cpu_copy(struct scif_copy_work *work) 1450{ 1451 void *src_virt, *dst_virt; 1452 size_t loop_len, remaining_len; 1453 int src_page_off, dst_page_off; 1454 s64 src_offset = work->src_offset, dst_offset = work->dst_offset; 1455 struct scif_window *src_window = work->src_window; 1456 struct scif_window *dst_window = work->dst_window; 1457 s64 end_src_offset, end_dst_offset; 1458 int ret = 0; 1459 struct scif_window_iter src_win_iter; 1460 struct scif_window_iter dst_win_iter; 1461 1462 remaining_len = work->len; 1463 1464 scif_init_window_iter(src_window, &src_win_iter); 1465 scif_init_window_iter(dst_window, &dst_win_iter); 1466 while (remaining_len) { 1467 src_page_off = src_offset & ~PAGE_MASK; 1468 dst_page_off = dst_offset & ~PAGE_MASK; 1469 loop_len = min(PAGE_SIZE - 1470 max(src_page_off, dst_page_off), 1471 remaining_len); 1472 1473 if (src_window->type == SCIF_WINDOW_SELF) 1474 src_virt = _get_local_va(src_offset, src_window, 1475 loop_len); 1476 else 1477 src_virt = ioremap_remote(src_offset, src_window, 1478 loop_len, 1479 work->remote_dev, 1480 &src_win_iter); 1481 if (!src_virt) { 1482 ret = -ENOMEM; 1483 goto error; 1484 } 1485 1486 if (dst_window->type == SCIF_WINDOW_SELF) 1487 dst_virt = _get_local_va(dst_offset, dst_window, 1488 loop_len); 1489 else 1490 dst_virt = ioremap_remote(dst_offset, dst_window, 1491 loop_len, 1492 work->remote_dev, 1493 &dst_win_iter); 1494 if (!dst_virt) { 1495 if (src_window->type == SCIF_WINDOW_PEER) 1496 iounmap_remote(src_virt, loop_len, work); 1497 ret = -ENOMEM; 1498 goto error; 1499 } 1500 1501 if (work->loopback) { 1502 memcpy(dst_virt, src_virt, loop_len); 1503 } else { 1504 if (src_window->type == SCIF_WINDOW_SELF) 1505 memcpy_toio((void __iomem __force *)dst_virt, 1506 src_virt, loop_len); 1507 else 1508 memcpy_fromio(dst_virt, 1509 (void __iomem __force *)src_virt, 1510 loop_len); 1511 } 1512 if (src_window->type == SCIF_WINDOW_PEER) 1513 iounmap_remote(src_virt, loop_len, work); 1514 1515 if (dst_window->type == SCIF_WINDOW_PEER) 1516 iounmap_remote(dst_virt, loop_len, work); 1517 1518 src_offset += loop_len; 1519 dst_offset += loop_len; 1520 remaining_len -= loop_len; 1521 if (remaining_len) { 1522 end_src_offset = src_window->offset + 1523 (src_window->nr_pages << PAGE_SHIFT); 1524 end_dst_offset = dst_window->offset + 1525 (dst_window->nr_pages << PAGE_SHIFT); 1526 if (src_offset == end_src_offset) { 1527 src_window = list_next_entry(src_window, list); 1528 scif_init_window_iter(src_window, 1529 &src_win_iter); 1530 } 1531 if (dst_offset == end_dst_offset) { 1532 dst_window = list_next_entry(dst_window, list); 1533 scif_init_window_iter(dst_window, 1534 &dst_win_iter); 1535 } 1536 } 1537 } 1538error: 1539 return ret; 1540} 1541 1542static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd, 1543 struct scif_copy_work *work, 1544 struct dma_chan *chan, off_t loffset) 1545{ 1546 int src_cache_off, dst_cache_off; 1547 s64 src_offset = work->src_offset, dst_offset = work->dst_offset; 1548 u8 *temp = NULL; 1549 bool src_local = true; 1550 struct scif_dma_comp_cb *comp_cb; 1551 int err; 1552 1553 if (is_dma_copy_aligned(chan->device, 1, 1, 1)) 1554 return _scif_rma_list_dma_copy_aligned(work, chan); 1555 1556 src_cache_off = src_offset & (L1_CACHE_BYTES - 1); 1557 dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); 1558 1559 if (dst_cache_off == src_cache_off) 1560 return scif_rma_list_dma_copy_aligned(work, chan); 1561 1562 if (work->loopback) 1563 return scif_rma_list_cpu_copy(work); 1564 src_local = work->src_window->type == SCIF_WINDOW_SELF; 1565 1566 /* Allocate dma_completion cb */ 1567 comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL); 1568 if (!comp_cb) 1569 goto error; 1570 1571 work->comp_cb = comp_cb; 1572 comp_cb->cb_cookie = comp_cb; 1573 comp_cb->dma_completion_func = &scif_rma_completion_cb; 1574 1575 if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) { 1576 comp_cb->is_cache = false; 1577 /* Allocate padding bytes to align to a cache line */ 1578 temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), 1579 GFP_KERNEL); 1580 if (!temp) 1581 goto free_comp_cb; 1582 comp_cb->temp_buf_to_free = temp; 1583 /* kmalloc(..) does not guarantee cache line alignment */ 1584 if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES)) 1585 temp = PTR_ALIGN(temp, L1_CACHE_BYTES); 1586 } else { 1587 comp_cb->is_cache = true; 1588 temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL); 1589 if (!temp) 1590 goto free_comp_cb; 1591 comp_cb->temp_buf_to_free = temp; 1592 } 1593 1594 if (src_local) { 1595 temp += dst_cache_off; 1596 scif_rma_local_cpu_copy(work->src_offset, work->src_window, 1597 temp, work->len, true); 1598 } else { 1599 comp_cb->dst_window = work->dst_window; 1600 comp_cb->dst_offset = work->dst_offset; 1601 work->src_offset = work->src_offset - src_cache_off; 1602 comp_cb->len = work->len; 1603 work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); 1604 comp_cb->header_padding = src_cache_off; 1605 } 1606 comp_cb->temp_buf = temp; 1607 1608 err = scif_map_single(&comp_cb->temp_phys, temp, 1609 work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE); 1610 if (err) 1611 goto free_temp_buf; 1612 comp_cb->sdev = work->remote_dev; 1613 if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0) 1614 goto free_temp_buf; 1615 if (!src_local) 1616 work->fence_type = SCIF_DMA_INTR; 1617 return 0; 1618free_temp_buf: 1619 if (comp_cb->is_cache) 1620 kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free); 1621 else 1622 kfree(comp_cb->temp_buf_to_free); 1623free_comp_cb: 1624 kfree(comp_cb); 1625error: 1626 return -ENOMEM; 1627} 1628 1629/** 1630 * scif_rma_copy: 1631 * @epd: end point descriptor. 1632 * @loffset: offset in local registered address space to/from which to copy 1633 * @addr: user virtual address to/from which to copy 1634 * @len: length of range to copy 1635 * @roffset: offset in remote registered address space to/from which to copy 1636 * @flags: flags 1637 * @dir: LOCAL->REMOTE or vice versa. 1638 * @last_chunk: true if this is the last chunk of a larger transfer 1639 * 1640 * Validate parameters, check if src/dst registered ranges requested for copy 1641 * are valid and initiate either CPU or DMA copy. 1642 */ 1643static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr, 1644 size_t len, off_t roffset, int flags, 1645 enum scif_rma_dir dir, bool last_chunk) 1646{ 1647 struct scif_endpt *ep = (struct scif_endpt *)epd; 1648 struct scif_rma_req remote_req; 1649 struct scif_rma_req req; 1650 struct scif_window *local_window = NULL; 1651 struct scif_window *remote_window = NULL; 1652 struct scif_copy_work copy_work; 1653 bool loopback; 1654 int err = 0; 1655 struct dma_chan *chan; 1656 struct scif_mmu_notif *mmn = NULL; 1657 bool cache = false; 1658 struct device *spdev; 1659 1660 err = scif_verify_epd(ep); 1661 if (err) 1662 return err; 1663 1664 if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE | 1665 SCIF_RMA_SYNC | SCIF_RMA_ORDERED))) 1666 return -EINVAL; 1667 1668 loopback = scifdev_self(ep->remote_dev) ? true : false; 1669 copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ? 1670 SCIF_DMA_POLL : 0; 1671 copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk); 1672 1673 /* Use CPU for Mgmt node <-> Mgmt node copies */ 1674 if (loopback && scif_is_mgmt_node()) { 1675 flags |= SCIF_RMA_USECPU; 1676 copy_work.fence_type = 0x0; 1677 } 1678 1679 cache = scif_is_set_reg_cache(flags); 1680 1681 remote_req.out_window = &remote_window; 1682 remote_req.offset = roffset; 1683 remote_req.nr_bytes = len; 1684 /* 1685 * If transfer is from local to remote then the remote window 1686 * must be writeable and vice versa. 1687 */ 1688 remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ; 1689 remote_req.type = SCIF_WINDOW_PARTIAL; 1690 remote_req.head = &ep->rma_info.remote_reg_list; 1691 1692 spdev = scif_get_peer_dev(ep->remote_dev); 1693 if (IS_ERR(spdev)) { 1694 err = PTR_ERR(spdev); 1695 return err; 1696 } 1697 1698 if (addr && cache) { 1699 mutex_lock(&ep->rma_info.mmn_lock); 1700 mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info); 1701 if (!mmn) 1702 mmn = scif_add_mmu_notifier(current->mm, ep); 1703 mutex_unlock(&ep->rma_info.mmn_lock); 1704 if (IS_ERR(mmn)) { 1705 scif_put_peer_dev(spdev); 1706 return PTR_ERR(mmn); 1707 } 1708 cache = cache && !scif_rma_tc_can_cache(ep, len); 1709 } 1710 mutex_lock(&ep->rma_info.rma_lock); 1711 if (addr) { 1712 req.out_window = &local_window; 1713 req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK), 1714 PAGE_SIZE); 1715 req.va_for_temp = addr & PAGE_MASK; 1716 req.prot = (dir == SCIF_LOCAL_TO_REMOTE ? 1717 VM_READ : VM_WRITE | VM_READ); 1718 /* Does a valid local window exist? */ 1719 if (mmn) { 1720 spin_lock(&ep->rma_info.tc_lock); 1721 req.head = &mmn->tc_reg_list; 1722 err = scif_query_tcw(ep, &req); 1723 spin_unlock(&ep->rma_info.tc_lock); 1724 } 1725 if (!mmn || err) { 1726 err = scif_register_temp(epd, req.va_for_temp, 1727 req.nr_bytes, req.prot, 1728 &loffset, &local_window); 1729 if (err) { 1730 mutex_unlock(&ep->rma_info.rma_lock); 1731 goto error; 1732 } 1733 if (!cache) 1734 goto skip_cache; 1735 atomic_inc(&ep->rma_info.tcw_refcount); 1736 atomic_add_return(local_window->nr_pages, 1737 &ep->rma_info.tcw_total_pages); 1738 if (mmn) { 1739 spin_lock(&ep->rma_info.tc_lock); 1740 scif_insert_tcw(local_window, 1741 &mmn->tc_reg_list); 1742 spin_unlock(&ep->rma_info.tc_lock); 1743 } 1744 } 1745skip_cache: 1746 loffset = local_window->offset + 1747 (addr - local_window->va_for_temp); 1748 } else { 1749 req.out_window = &local_window; 1750 req.offset = loffset; 1751 /* 1752 * If transfer is from local to remote then the self window 1753 * must be readable and vice versa. 1754 */ 1755 req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE; 1756 req.nr_bytes = len; 1757 req.type = SCIF_WINDOW_PARTIAL; 1758 req.head = &ep->rma_info.reg_list; 1759 /* Does a valid local window exist? */ 1760 err = scif_query_window(&req); 1761 if (err) { 1762 mutex_unlock(&ep->rma_info.rma_lock); 1763 goto error; 1764 } 1765 } 1766 1767 /* Does a valid remote window exist? */ 1768 err = scif_query_window(&remote_req); 1769 if (err) { 1770 mutex_unlock(&ep->rma_info.rma_lock); 1771 goto error; 1772 } 1773 1774 /* 1775 * Prepare copy_work for submitting work to the DMA kernel thread 1776 * or CPU copy routine. 1777 */ 1778 copy_work.len = len; 1779 copy_work.loopback = loopback; 1780 copy_work.remote_dev = ep->remote_dev; 1781 if (dir == SCIF_LOCAL_TO_REMOTE) { 1782 copy_work.src_offset = loffset; 1783 copy_work.src_window = local_window; 1784 copy_work.dst_offset = roffset; 1785 copy_work.dst_window = remote_window; 1786 } else { 1787 copy_work.src_offset = roffset; 1788 copy_work.src_window = remote_window; 1789 copy_work.dst_offset = loffset; 1790 copy_work.dst_window = local_window; 1791 } 1792 1793 if (flags & SCIF_RMA_USECPU) { 1794 scif_rma_list_cpu_copy(&copy_work); 1795 } else { 1796 chan = ep->rma_info.dma_chan; 1797 err = scif_rma_list_dma_copy_wrapper(epd, &copy_work, 1798 chan, loffset); 1799 } 1800 if (addr && !cache) 1801 atomic_inc(&ep->rma_info.tw_refcount); 1802 1803 mutex_unlock(&ep->rma_info.rma_lock); 1804 1805 if (last_chunk) { 1806 struct scif_dev *rdev = ep->remote_dev; 1807 1808 if (copy_work.fence_type == SCIF_DMA_POLL) 1809 err = scif_drain_dma_poll(rdev->sdev, 1810 ep->rma_info.dma_chan); 1811 else if (copy_work.fence_type == SCIF_DMA_INTR) 1812 err = scif_drain_dma_intr(rdev->sdev, 1813 ep->rma_info.dma_chan); 1814 } 1815 1816 if (addr && !cache) 1817 scif_queue_for_cleanup(local_window, &scif_info.rma); 1818 scif_put_peer_dev(spdev); 1819 return err; 1820error: 1821 if (err) { 1822 if (addr && local_window && !cache) 1823 scif_destroy_window(ep, local_window); 1824 dev_err(scif_info.mdev.this_device, 1825 "%s %d err %d len 0x%lx\n", 1826 __func__, __LINE__, err, len); 1827 } 1828 scif_put_peer_dev(spdev); 1829 return err; 1830} 1831 1832int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, 1833 off_t roffset, int flags) 1834{ 1835 int err; 1836 1837 dev_dbg(scif_info.mdev.this_device, 1838 "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n", 1839 epd, loffset, len, roffset, flags); 1840 if (scif_unaligned(loffset, roffset)) { 1841 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { 1842 err = scif_rma_copy(epd, loffset, 0x0, 1843 SCIF_MAX_UNALIGNED_BUF_SIZE, 1844 roffset, flags, 1845 SCIF_REMOTE_TO_LOCAL, false); 1846 if (err) 1847 goto readfrom_err; 1848 loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1849 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1850 len -= SCIF_MAX_UNALIGNED_BUF_SIZE; 1851 } 1852 } 1853 err = scif_rma_copy(epd, loffset, 0x0, len, 1854 roffset, flags, SCIF_REMOTE_TO_LOCAL, true); 1855readfrom_err: 1856 return err; 1857} 1858EXPORT_SYMBOL_GPL(scif_readfrom); 1859 1860int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, 1861 off_t roffset, int flags) 1862{ 1863 int err; 1864 1865 dev_dbg(scif_info.mdev.this_device, 1866 "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n", 1867 epd, loffset, len, roffset, flags); 1868 if (scif_unaligned(loffset, roffset)) { 1869 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { 1870 err = scif_rma_copy(epd, loffset, 0x0, 1871 SCIF_MAX_UNALIGNED_BUF_SIZE, 1872 roffset, flags, 1873 SCIF_LOCAL_TO_REMOTE, false); 1874 if (err) 1875 goto writeto_err; 1876 loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1877 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1878 len -= SCIF_MAX_UNALIGNED_BUF_SIZE; 1879 } 1880 } 1881 err = scif_rma_copy(epd, loffset, 0x0, len, 1882 roffset, flags, SCIF_LOCAL_TO_REMOTE, true); 1883writeto_err: 1884 return err; 1885} 1886EXPORT_SYMBOL_GPL(scif_writeto); 1887 1888int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, 1889 off_t roffset, int flags) 1890{ 1891 int err; 1892 1893 dev_dbg(scif_info.mdev.this_device, 1894 "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", 1895 epd, addr, len, roffset, flags); 1896 if (scif_unaligned((off_t __force)addr, roffset)) { 1897 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) 1898 flags &= ~SCIF_RMA_USECACHE; 1899 1900 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { 1901 err = scif_rma_copy(epd, 0, (u64)addr, 1902 SCIF_MAX_UNALIGNED_BUF_SIZE, 1903 roffset, flags, 1904 SCIF_REMOTE_TO_LOCAL, false); 1905 if (err) 1906 goto vreadfrom_err; 1907 addr += SCIF_MAX_UNALIGNED_BUF_SIZE; 1908 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1909 len -= SCIF_MAX_UNALIGNED_BUF_SIZE; 1910 } 1911 } 1912 err = scif_rma_copy(epd, 0, (u64)addr, len, 1913 roffset, flags, SCIF_REMOTE_TO_LOCAL, true); 1914vreadfrom_err: 1915 return err; 1916} 1917EXPORT_SYMBOL_GPL(scif_vreadfrom); 1918 1919int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, 1920 off_t roffset, int flags) 1921{ 1922 int err; 1923 1924 dev_dbg(scif_info.mdev.this_device, 1925 "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", 1926 epd, addr, len, roffset, flags); 1927 if (scif_unaligned((off_t __force)addr, roffset)) { 1928 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) 1929 flags &= ~SCIF_RMA_USECACHE; 1930 1931 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { 1932 err = scif_rma_copy(epd, 0, (u64)addr, 1933 SCIF_MAX_UNALIGNED_BUF_SIZE, 1934 roffset, flags, 1935 SCIF_LOCAL_TO_REMOTE, false); 1936 if (err) 1937 goto vwriteto_err; 1938 addr += SCIF_MAX_UNALIGNED_BUF_SIZE; 1939 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; 1940 len -= SCIF_MAX_UNALIGNED_BUF_SIZE; 1941 } 1942 } 1943 err = scif_rma_copy(epd, 0, (u64)addr, len, 1944 roffset, flags, SCIF_LOCAL_TO_REMOTE, true); 1945vwriteto_err: 1946 return err; 1947} 1948EXPORT_SYMBOL_GPL(scif_vwriteto);