Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.7 1752 lines 47 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Intel MIC Platform Software Stack (MPSS) 4 * 5 * Copyright(c) 2015 Intel Corporation. 6 * 7 * Intel SCIF driver. 8 */ 9#include <linux/intel-iommu.h> 10#include <linux/pagemap.h> 11#include <linux/sched/mm.h> 12#include <linux/sched/signal.h> 13 14#include "scif_main.h" 15#include "scif_map.h" 16 17/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */ 18#define SCIF_MAP_ULIMIT 0x40 19 20bool scif_ulimit_check = 1; 21 22/** 23 * scif_rma_ep_init: 24 * @ep: end point 25 * 26 * Initialize RMA per EP data structures. 27 */ 28void scif_rma_ep_init(struct scif_endpt *ep) 29{ 30 struct scif_endpt_rma_info *rma = &ep->rma_info; 31 32 mutex_init(&rma->rma_lock); 33 init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); 34 spin_lock_init(&rma->tc_lock); 35 mutex_init(&rma->mmn_lock); 36 INIT_LIST_HEAD(&rma->reg_list); 37 INIT_LIST_HEAD(&rma->remote_reg_list); 38 atomic_set(&rma->tw_refcount, 0); 39 atomic_set(&rma->tcw_refcount, 0); 40 atomic_set(&rma->tcw_total_pages, 0); 41 atomic_set(&rma->fence_refcount, 0); 42 43 rma->async_list_del = 0; 44 rma->dma_chan = NULL; 45 INIT_LIST_HEAD(&rma->mmn_list); 46 INIT_LIST_HEAD(&rma->vma_list); 47 init_waitqueue_head(&rma->markwq); 48} 49 50/** 51 * scif_rma_ep_can_uninit: 52 * @ep: end point 53 * 54 * Returns 1 if an endpoint can be uninitialized and 0 otherwise. 55 */ 56int scif_rma_ep_can_uninit(struct scif_endpt *ep) 57{ 58 int ret = 0; 59 60 mutex_lock(&ep->rma_info.rma_lock); 61 /* Destroy RMA Info only if both lists are empty */ 62 if (list_empty(&ep->rma_info.reg_list) && 63 list_empty(&ep->rma_info.remote_reg_list) && 64 list_empty(&ep->rma_info.mmn_list) && 65 !atomic_read(&ep->rma_info.tw_refcount) && 66 !atomic_read(&ep->rma_info.tcw_refcount) && 67 !atomic_read(&ep->rma_info.fence_refcount)) 68 ret = 1; 69 mutex_unlock(&ep->rma_info.rma_lock); 70 return ret; 71} 72 73/** 74 * scif_create_pinned_pages: 75 * @nr_pages: number of pages in window 76 * @prot: read/write protection 77 * 78 * Allocate and prepare a set of pinned pages. 79 */ 80static struct scif_pinned_pages * 81scif_create_pinned_pages(int nr_pages, int prot) 82{ 83 struct scif_pinned_pages *pin; 84 85 might_sleep(); 86 pin = scif_zalloc(sizeof(*pin)); 87 if (!pin) 88 goto error; 89 90 pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages)); 91 if (!pin->pages) 92 goto error_free_pinned_pages; 93 94 pin->prot = prot; 95 pin->magic = SCIFEP_MAGIC; 96 return pin; 97 98error_free_pinned_pages: 99 scif_free(pin, sizeof(*pin)); 100error: 101 return NULL; 102} 103 104/** 105 * scif_destroy_pinned_pages: 106 * @pin: A set of pinned pages. 107 * 108 * Deallocate resources for pinned pages. 109 */ 110static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin) 111{ 112 int j; 113 int writeable = pin->prot & SCIF_PROT_WRITE; 114 int kernel = SCIF_MAP_KERNEL & pin->map_flags; 115 116 for (j = 0; j < pin->nr_pages; j++) { 117 if (pin->pages[j] && !kernel) { 118 if (writeable) 119 SetPageDirty(pin->pages[j]); 120 put_page(pin->pages[j]); 121 } 122 } 123 124 scif_free(pin->pages, 125 pin->nr_pages * sizeof(*pin->pages)); 126 scif_free(pin, sizeof(*pin)); 127 return 0; 128} 129 130/* 131 * scif_create_window: 132 * @ep: end point 133 * @nr_pages: number of pages 134 * @offset: registration offset 135 * @temp: true if a temporary window is being created 136 * 137 * Allocate and prepare a self registration window. 138 */ 139struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, 140 s64 offset, bool temp) 141{ 142 struct scif_window *window; 143 144 might_sleep(); 145 window = scif_zalloc(sizeof(*window)); 146 if (!window) 147 goto error; 148 149 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); 150 if (!window->dma_addr) 151 goto error_free_window; 152 153 window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages)); 154 if (!window->num_pages) 155 goto error_free_window; 156 157 window->offset = offset; 158 window->ep = (u64)ep; 159 window->magic = SCIFEP_MAGIC; 160 window->reg_state = OP_IDLE; 161 init_waitqueue_head(&window->regwq); 162 window->unreg_state = OP_IDLE; 163 init_waitqueue_head(&window->unregwq); 164 INIT_LIST_HEAD(&window->list); 165 window->type = SCIF_WINDOW_SELF; 166 window->temp = temp; 167 return window; 168 169error_free_window: 170 scif_free(window->dma_addr, 171 nr_pages * sizeof(*window->dma_addr)); 172 scif_free(window, sizeof(*window)); 173error: 174 return NULL; 175} 176 177/** 178 * scif_destroy_incomplete_window: 179 * @ep: end point 180 * @window: registration window 181 * 182 * Deallocate resources for self window. 183 */ 184static void scif_destroy_incomplete_window(struct scif_endpt *ep, 185 struct scif_window *window) 186{ 187 int err; 188 int nr_pages = window->nr_pages; 189 struct scif_allocmsg *alloc = &window->alloc_handle; 190 struct scifmsg msg; 191 192retry: 193 /* Wait for a SCIF_ALLOC_GNT/REJ message */ 194 err = wait_event_timeout(alloc->allocwq, 195 alloc->state != OP_IN_PROGRESS, 196 SCIF_NODE_ALIVE_TIMEOUT); 197 if (!err && scifdev_alive(ep)) 198 goto retry; 199 200 mutex_lock(&ep->rma_info.rma_lock); 201 if (alloc->state == OP_COMPLETED) { 202 msg.uop = SCIF_FREE_VIRT; 203 msg.src = ep->port; 204 msg.payload[0] = ep->remote_ep; 205 msg.payload[1] = window->alloc_handle.vaddr; 206 msg.payload[2] = (u64)window; 207 msg.payload[3] = SCIF_REGISTER; 208 _scif_nodeqp_send(ep->remote_dev, &msg); 209 } 210 mutex_unlock(&ep->rma_info.rma_lock); 211 212 scif_free_window_offset(ep, window, window->offset); 213 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); 214 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); 215 scif_free(window, sizeof(*window)); 216} 217 218/** 219 * scif_unmap_window: 220 * @remote_dev: SCIF remote device 221 * @window: registration window 222 * 223 * Delete any DMA mappings created for a registered self window 224 */ 225void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window) 226{ 227 int j; 228 229 if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) { 230 if (window->st) { 231 dma_unmap_sg(&remote_dev->sdev->dev, 232 window->st->sgl, window->st->nents, 233 DMA_BIDIRECTIONAL); 234 sg_free_table(window->st); 235 kfree(window->st); 236 window->st = NULL; 237 } 238 } else { 239 for (j = 0; j < window->nr_contig_chunks; j++) { 240 if (window->dma_addr[j]) { 241 scif_unmap_single(window->dma_addr[j], 242 remote_dev, 243 window->num_pages[j] << 244 PAGE_SHIFT); 245 window->dma_addr[j] = 0x0; 246 } 247 } 248 } 249} 250 251static inline struct mm_struct *__scif_acquire_mm(void) 252{ 253 if (scif_ulimit_check) 254 return get_task_mm(current); 255 return NULL; 256} 257 258static inline void __scif_release_mm(struct mm_struct *mm) 259{ 260 if (mm) 261 mmput(mm); 262} 263 264static inline int 265__scif_dec_pinned_vm_lock(struct mm_struct *mm, 266 int nr_pages) 267{ 268 if (!mm || !nr_pages || !scif_ulimit_check) 269 return 0; 270 271 atomic64_sub(nr_pages, &mm->pinned_vm); 272 return 0; 273} 274 275static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm, 276 int nr_pages) 277{ 278 unsigned long locked, lock_limit; 279 280 if (!mm || !nr_pages || !scif_ulimit_check) 281 return 0; 282 283 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 284 locked = atomic64_add_return(nr_pages, &mm->pinned_vm); 285 286 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 287 atomic64_sub(nr_pages, &mm->pinned_vm); 288 dev_err(scif_info.mdev.this_device, 289 "locked(%lu) > lock_limit(%lu)\n", 290 locked, lock_limit); 291 return -ENOMEM; 292 } 293 return 0; 294} 295 296/** 297 * scif_destroy_window: 298 * @ep: end point 299 * @window: registration window 300 * 301 * Deallocate resources for self window. 302 */ 303int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window) 304{ 305 int j; 306 struct scif_pinned_pages *pinned_pages = window->pinned_pages; 307 int nr_pages = window->nr_pages; 308 309 might_sleep(); 310 if (!window->temp && window->mm) { 311 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages); 312 __scif_release_mm(window->mm); 313 window->mm = NULL; 314 } 315 316 scif_free_window_offset(ep, window, window->offset); 317 scif_unmap_window(ep->remote_dev, window); 318 /* 319 * Decrement references for this set of pinned pages from 320 * this window. 321 */ 322 j = atomic_sub_return(1, &pinned_pages->ref_count); 323 if (j < 0) 324 dev_err(scif_info.mdev.this_device, 325 "%s %d incorrect ref count %d\n", 326 __func__, __LINE__, j); 327 /* 328 * If the ref count for pinned_pages is zero then someone 329 * has already called scif_unpin_pages() for it and we should 330 * destroy the page cache. 331 */ 332 if (!j) 333 scif_destroy_pinned_pages(window->pinned_pages); 334 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); 335 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); 336 window->magic = 0; 337 scif_free(window, sizeof(*window)); 338 return 0; 339} 340 341/** 342 * scif_create_remote_lookup: 343 * @remote_dev: SCIF remote device 344 * @window: remote window 345 * 346 * Allocate and prepare lookup entries for the remote 347 * end to copy over the physical addresses. 348 * Returns 0 on success and appropriate errno on failure. 349 */ 350static int scif_create_remote_lookup(struct scif_dev *remote_dev, 351 struct scif_window *window) 352{ 353 int i, j, err = 0; 354 int nr_pages = window->nr_pages; 355 bool vmalloc_dma_phys, vmalloc_num_pages; 356 357 might_sleep(); 358 /* Map window */ 359 err = scif_map_single(&window->mapped_offset, 360 window, remote_dev, sizeof(*window)); 361 if (err) 362 goto error_window; 363 364 /* Compute the number of lookup entries. 21 == 2MB Shift */ 365 window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, 366 ((2) * 1024 * 1024)) >> 21; 367 368 window->dma_addr_lookup.lookup = 369 scif_alloc_coherent(&window->dma_addr_lookup.offset, 370 remote_dev, window->nr_lookup * 371 sizeof(*window->dma_addr_lookup.lookup), 372 GFP_KERNEL | __GFP_ZERO); 373 if (!window->dma_addr_lookup.lookup) { 374 err = -ENOMEM; 375 goto error_window; 376 } 377 378 window->num_pages_lookup.lookup = 379 scif_alloc_coherent(&window->num_pages_lookup.offset, 380 remote_dev, window->nr_lookup * 381 sizeof(*window->num_pages_lookup.lookup), 382 GFP_KERNEL | __GFP_ZERO); 383 if (!window->num_pages_lookup.lookup) { 384 err = -ENOMEM; 385 goto error_window; 386 } 387 388 vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]); 389 vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]); 390 391 /* Now map each of the pages containing physical addresses */ 392 for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) { 393 err = scif_map_page(&window->dma_addr_lookup.lookup[j], 394 vmalloc_dma_phys ? 395 vmalloc_to_page(&window->dma_addr[i]) : 396 virt_to_page(&window->dma_addr[i]), 397 remote_dev); 398 if (err) 399 goto error_window; 400 err = scif_map_page(&window->num_pages_lookup.lookup[j], 401 vmalloc_num_pages ? 402 vmalloc_to_page(&window->num_pages[i]) : 403 virt_to_page(&window->num_pages[i]), 404 remote_dev); 405 if (err) 406 goto error_window; 407 } 408 return 0; 409error_window: 410 return err; 411} 412 413/** 414 * scif_destroy_remote_lookup: 415 * @remote_dev: SCIF remote device 416 * @window: remote window 417 * 418 * Destroy lookup entries used for the remote 419 * end to copy over the physical addresses. 420 */ 421static void scif_destroy_remote_lookup(struct scif_dev *remote_dev, 422 struct scif_window *window) 423{ 424 int i, j; 425 426 if (window->nr_lookup) { 427 struct scif_rma_lookup *lup = &window->dma_addr_lookup; 428 struct scif_rma_lookup *npup = &window->num_pages_lookup; 429 430 for (i = 0, j = 0; i < window->nr_pages; 431 i += SCIF_NR_ADDR_IN_PAGE, j++) { 432 if (lup->lookup && lup->lookup[j]) 433 scif_unmap_single(lup->lookup[j], 434 remote_dev, 435 PAGE_SIZE); 436 if (npup->lookup && npup->lookup[j]) 437 scif_unmap_single(npup->lookup[j], 438 remote_dev, 439 PAGE_SIZE); 440 } 441 if (lup->lookup) 442 scif_free_coherent(lup->lookup, lup->offset, 443 remote_dev, window->nr_lookup * 444 sizeof(*lup->lookup)); 445 if (npup->lookup) 446 scif_free_coherent(npup->lookup, npup->offset, 447 remote_dev, window->nr_lookup * 448 sizeof(*npup->lookup)); 449 if (window->mapped_offset) 450 scif_unmap_single(window->mapped_offset, 451 remote_dev, sizeof(*window)); 452 window->nr_lookup = 0; 453 } 454} 455 456/** 457 * scif_create_remote_window: 458 * @ep: end point 459 * @nr_pages: number of pages in window 460 * 461 * Allocate and prepare a remote registration window. 462 */ 463static struct scif_window * 464scif_create_remote_window(struct scif_dev *scifdev, int nr_pages) 465{ 466 struct scif_window *window; 467 468 might_sleep(); 469 window = scif_zalloc(sizeof(*window)); 470 if (!window) 471 goto error_ret; 472 473 window->magic = SCIFEP_MAGIC; 474 window->nr_pages = nr_pages; 475 476 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); 477 if (!window->dma_addr) 478 goto error_window; 479 480 window->num_pages = scif_zalloc(nr_pages * 481 sizeof(*window->num_pages)); 482 if (!window->num_pages) 483 goto error_window; 484 485 if (scif_create_remote_lookup(scifdev, window)) 486 goto error_window; 487 488 window->type = SCIF_WINDOW_PEER; 489 window->unreg_state = OP_IDLE; 490 INIT_LIST_HEAD(&window->list); 491 return window; 492error_window: 493 scif_destroy_remote_window(window); 494error_ret: 495 return NULL; 496} 497 498/** 499 * scif_destroy_remote_window: 500 * @ep: end point 501 * @window: remote registration window 502 * 503 * Deallocate resources for remote window. 504 */ 505void 506scif_destroy_remote_window(struct scif_window *window) 507{ 508 scif_free(window->dma_addr, window->nr_pages * 509 sizeof(*window->dma_addr)); 510 scif_free(window->num_pages, window->nr_pages * 511 sizeof(*window->num_pages)); 512 window->magic = 0; 513 scif_free(window, sizeof(*window)); 514} 515 516/** 517 * scif_iommu_map: create DMA mappings if the IOMMU is enabled 518 * @remote_dev: SCIF remote device 519 * @window: remote registration window 520 * 521 * Map the physical pages using dma_map_sg(..) and then detect the number 522 * of contiguous DMA mappings allocated 523 */ 524static int scif_iommu_map(struct scif_dev *remote_dev, 525 struct scif_window *window) 526{ 527 struct scatterlist *sg; 528 int i, err; 529 scif_pinned_pages_t pin = window->pinned_pages; 530 531 window->st = kzalloc(sizeof(*window->st), GFP_KERNEL); 532 if (!window->st) 533 return -ENOMEM; 534 535 err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL); 536 if (err) 537 return err; 538 539 for_each_sg(window->st->sgl, sg, window->st->nents, i) 540 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0); 541 542 err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl, 543 window->st->nents, DMA_BIDIRECTIONAL); 544 if (!err) 545 return -ENOMEM; 546 /* Detect contiguous ranges of DMA mappings */ 547 sg = window->st->sgl; 548 for (i = 0; sg; i++) { 549 dma_addr_t last_da; 550 551 window->dma_addr[i] = sg_dma_address(sg); 552 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT; 553 last_da = sg_dma_address(sg) + sg_dma_len(sg); 554 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) { 555 window->num_pages[i] += 556 (sg_dma_len(sg) >> PAGE_SHIFT); 557 last_da = window->dma_addr[i] + 558 sg_dma_len(sg); 559 } 560 window->nr_contig_chunks++; 561 } 562 return 0; 563} 564 565/** 566 * scif_map_window: 567 * @remote_dev: SCIF remote device 568 * @window: self registration window 569 * 570 * Map pages of a window into the aperture/PCI. 571 * Also determine addresses required for DMA. 572 */ 573int 574scif_map_window(struct scif_dev *remote_dev, struct scif_window *window) 575{ 576 int i, j, k, err = 0, nr_contig_pages; 577 scif_pinned_pages_t pin; 578 phys_addr_t phys_prev, phys_curr; 579 580 might_sleep(); 581 582 pin = window->pinned_pages; 583 584 if (intel_iommu_enabled && !scifdev_self(remote_dev)) 585 return scif_iommu_map(remote_dev, window); 586 587 for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) { 588 phys_prev = page_to_phys(pin->pages[i]); 589 nr_contig_pages = 1; 590 591 /* Detect physically contiguous chunks */ 592 for (k = i + 1; k < window->nr_pages; k++) { 593 phys_curr = page_to_phys(pin->pages[k]); 594 if (phys_curr != (phys_prev + PAGE_SIZE)) 595 break; 596 phys_prev = phys_curr; 597 nr_contig_pages++; 598 } 599 window->num_pages[j] = nr_contig_pages; 600 window->nr_contig_chunks++; 601 if (scif_is_mgmt_node()) { 602 /* 603 * Management node has to deal with SMPT on X100 and 604 * hence the DMA mapping is required 605 */ 606 err = scif_map_single(&window->dma_addr[j], 607 phys_to_virt(page_to_phys( 608 pin->pages[i])), 609 remote_dev, 610 nr_contig_pages << PAGE_SHIFT); 611 if (err) 612 return err; 613 } else { 614 window->dma_addr[j] = page_to_phys(pin->pages[i]); 615 } 616 } 617 return err; 618} 619 620/** 621 * scif_send_scif_unregister: 622 * @ep: end point 623 * @window: self registration window 624 * 625 * Send a SCIF_UNREGISTER message. 626 */ 627static int scif_send_scif_unregister(struct scif_endpt *ep, 628 struct scif_window *window) 629{ 630 struct scifmsg msg; 631 632 msg.uop = SCIF_UNREGISTER; 633 msg.src = ep->port; 634 msg.payload[0] = window->alloc_handle.vaddr; 635 msg.payload[1] = (u64)window; 636 return scif_nodeqp_send(ep->remote_dev, &msg); 637} 638 639/** 640 * scif_unregister_window: 641 * @window: self registration window 642 * 643 * Send an unregistration request and wait for a response. 644 */ 645int scif_unregister_window(struct scif_window *window) 646{ 647 int err = 0; 648 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 649 bool send_msg = false; 650 651 might_sleep(); 652 switch (window->unreg_state) { 653 case OP_IDLE: 654 { 655 window->unreg_state = OP_IN_PROGRESS; 656 send_msg = true; 657 } 658 /* fall through */ 659 case OP_IN_PROGRESS: 660 { 661 scif_get_window(window, 1); 662 mutex_unlock(&ep->rma_info.rma_lock); 663 if (send_msg) { 664 err = scif_send_scif_unregister(ep, window); 665 if (err) { 666 window->unreg_state = OP_COMPLETED; 667 goto done; 668 } 669 } else { 670 /* Return ENXIO since unregistration is in progress */ 671 mutex_lock(&ep->rma_info.rma_lock); 672 return -ENXIO; 673 } 674retry: 675 /* Wait for a SCIF_UNREGISTER_(N)ACK message */ 676 err = wait_event_timeout(window->unregwq, 677 window->unreg_state != OP_IN_PROGRESS, 678 SCIF_NODE_ALIVE_TIMEOUT); 679 if (!err && scifdev_alive(ep)) 680 goto retry; 681 if (!err) { 682 err = -ENODEV; 683 window->unreg_state = OP_COMPLETED; 684 dev_err(scif_info.mdev.this_device, 685 "%s %d err %d\n", __func__, __LINE__, err); 686 } 687 if (err > 0) 688 err = 0; 689done: 690 mutex_lock(&ep->rma_info.rma_lock); 691 scif_put_window(window, 1); 692 break; 693 } 694 case OP_FAILED: 695 { 696 if (!scifdev_alive(ep)) { 697 err = -ENODEV; 698 window->unreg_state = OP_COMPLETED; 699 } 700 break; 701 } 702 case OP_COMPLETED: 703 break; 704 default: 705 err = -ENODEV; 706 } 707 708 if (window->unreg_state == OP_COMPLETED && window->ref_count) 709 scif_put_window(window, window->nr_pages); 710 711 if (!window->ref_count) { 712 atomic_inc(&ep->rma_info.tw_refcount); 713 list_del_init(&window->list); 714 scif_free_window_offset(ep, window, window->offset); 715 mutex_unlock(&ep->rma_info.rma_lock); 716 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) && 717 scifdev_alive(ep)) { 718 scif_drain_dma_intr(ep->remote_dev->sdev, 719 ep->rma_info.dma_chan); 720 } else { 721 if (!__scif_dec_pinned_vm_lock(window->mm, 722 window->nr_pages)) { 723 __scif_release_mm(window->mm); 724 window->mm = NULL; 725 } 726 } 727 scif_queue_for_cleanup(window, &scif_info.rma); 728 mutex_lock(&ep->rma_info.rma_lock); 729 } 730 return err; 731} 732 733/** 734 * scif_send_alloc_request: 735 * @ep: end point 736 * @window: self registration window 737 * 738 * Send a remote window allocation request 739 */ 740static int scif_send_alloc_request(struct scif_endpt *ep, 741 struct scif_window *window) 742{ 743 struct scifmsg msg; 744 struct scif_allocmsg *alloc = &window->alloc_handle; 745 746 /* Set up the Alloc Handle */ 747 alloc->state = OP_IN_PROGRESS; 748 init_waitqueue_head(&alloc->allocwq); 749 750 /* Send out an allocation request */ 751 msg.uop = SCIF_ALLOC_REQ; 752 msg.payload[1] = window->nr_pages; 753 msg.payload[2] = (u64)&window->alloc_handle; 754 return _scif_nodeqp_send(ep->remote_dev, &msg); 755} 756 757/** 758 * scif_prep_remote_window: 759 * @ep: end point 760 * @window: self registration window 761 * 762 * Send a remote window allocation request, wait for an allocation response, 763 * and prepares the remote window by copying over the page lists 764 */ 765static int scif_prep_remote_window(struct scif_endpt *ep, 766 struct scif_window *window) 767{ 768 struct scifmsg msg; 769 struct scif_window *remote_window; 770 struct scif_allocmsg *alloc = &window->alloc_handle; 771 dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1; 772 int i = 0, j = 0; 773 int nr_contig_chunks, loop_nr_contig_chunks; 774 int remaining_nr_contig_chunks, nr_lookup; 775 int err, map_err; 776 777 map_err = scif_map_window(ep->remote_dev, window); 778 if (map_err) 779 dev_err(&ep->remote_dev->sdev->dev, 780 "%s %d map_err %d\n", __func__, __LINE__, map_err); 781 remaining_nr_contig_chunks = window->nr_contig_chunks; 782 nr_contig_chunks = window->nr_contig_chunks; 783retry: 784 /* Wait for a SCIF_ALLOC_GNT/REJ message */ 785 err = wait_event_timeout(alloc->allocwq, 786 alloc->state != OP_IN_PROGRESS, 787 SCIF_NODE_ALIVE_TIMEOUT); 788 mutex_lock(&ep->rma_info.rma_lock); 789 /* Synchronize with the thread waking up allocwq */ 790 mutex_unlock(&ep->rma_info.rma_lock); 791 if (!err && scifdev_alive(ep)) 792 goto retry; 793 794 if (!err) 795 err = -ENODEV; 796 797 if (err > 0) 798 err = 0; 799 else 800 return err; 801 802 /* Bail out. The remote end rejected this request */ 803 if (alloc->state == OP_FAILED) 804 return -ENOMEM; 805 806 if (map_err) { 807 dev_err(&ep->remote_dev->sdev->dev, 808 "%s %d err %d\n", __func__, __LINE__, map_err); 809 msg.uop = SCIF_FREE_VIRT; 810 msg.src = ep->port; 811 msg.payload[0] = ep->remote_ep; 812 msg.payload[1] = window->alloc_handle.vaddr; 813 msg.payload[2] = (u64)window; 814 msg.payload[3] = SCIF_REGISTER; 815 spin_lock(&ep->lock); 816 if (ep->state == SCIFEP_CONNECTED) 817 err = _scif_nodeqp_send(ep->remote_dev, &msg); 818 else 819 err = -ENOTCONN; 820 spin_unlock(&ep->lock); 821 return err; 822 } 823 824 remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window), 825 ep->remote_dev); 826 827 /* Compute the number of lookup entries. 21 == 2MB Shift */ 828 nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE) 829 >> ilog2(SCIF_NR_ADDR_IN_PAGE); 830 831 dma_phys_lookup = 832 scif_ioremap(remote_window->dma_addr_lookup.offset, 833 nr_lookup * 834 sizeof(*remote_window->dma_addr_lookup.lookup), 835 ep->remote_dev); 836 num_pages_lookup = 837 scif_ioremap(remote_window->num_pages_lookup.offset, 838 nr_lookup * 839 sizeof(*remote_window->num_pages_lookup.lookup), 840 ep->remote_dev); 841 842 while (remaining_nr_contig_chunks) { 843 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks, 844 (int)SCIF_NR_ADDR_IN_PAGE); 845 /* #1/2 - Copy physical addresses over to the remote side */ 846 847 /* #2/2 - Copy DMA addresses (addresses that are fed into the 848 * DMA engine) We transfer bus addresses which are then 849 * converted into a MIC physical address on the remote 850 * side if it is a MIC, if the remote node is a mgmt node we 851 * transfer the MIC physical address 852 */ 853 tmp = scif_ioremap(dma_phys_lookup[j], 854 loop_nr_contig_chunks * 855 sizeof(*window->dma_addr), 856 ep->remote_dev); 857 tmp1 = scif_ioremap(num_pages_lookup[j], 858 loop_nr_contig_chunks * 859 sizeof(*window->num_pages), 860 ep->remote_dev); 861 if (scif_is_mgmt_node()) { 862 memcpy_toio((void __force __iomem *)tmp, 863 &window->dma_addr[i], loop_nr_contig_chunks 864 * sizeof(*window->dma_addr)); 865 memcpy_toio((void __force __iomem *)tmp1, 866 &window->num_pages[i], loop_nr_contig_chunks 867 * sizeof(*window->num_pages)); 868 } else { 869 if (scifdev_is_p2p(ep->remote_dev)) { 870 /* 871 * add remote node's base address for this node 872 * to convert it into a MIC address 873 */ 874 int m; 875 dma_addr_t dma_addr; 876 877 for (m = 0; m < loop_nr_contig_chunks; m++) { 878 dma_addr = window->dma_addr[i + m] + 879 ep->remote_dev->base_addr; 880 writeq(dma_addr, 881 (void __force __iomem *)&tmp[m]); 882 } 883 memcpy_toio((void __force __iomem *)tmp1, 884 &window->num_pages[i], 885 loop_nr_contig_chunks 886 * sizeof(*window->num_pages)); 887 } else { 888 /* Mgmt node or loopback - transfer DMA 889 * addresses as is, this is the same as a 890 * MIC physical address (we use the dma_addr 891 * and not the phys_addr array since the 892 * phys_addr is only setup if there is a mmap() 893 * request from the mgmt node) 894 */ 895 memcpy_toio((void __force __iomem *)tmp, 896 &window->dma_addr[i], 897 loop_nr_contig_chunks * 898 sizeof(*window->dma_addr)); 899 memcpy_toio((void __force __iomem *)tmp1, 900 &window->num_pages[i], 901 loop_nr_contig_chunks * 902 sizeof(*window->num_pages)); 903 } 904 } 905 remaining_nr_contig_chunks -= loop_nr_contig_chunks; 906 i += loop_nr_contig_chunks; 907 j++; 908 scif_iounmap(tmp, loop_nr_contig_chunks * 909 sizeof(*window->dma_addr), ep->remote_dev); 910 scif_iounmap(tmp1, loop_nr_contig_chunks * 911 sizeof(*window->num_pages), ep->remote_dev); 912 } 913 914 /* Prepare the remote window for the peer */ 915 remote_window->peer_window = (u64)window; 916 remote_window->offset = window->offset; 917 remote_window->prot = window->prot; 918 remote_window->nr_contig_chunks = nr_contig_chunks; 919 remote_window->ep = ep->remote_ep; 920 scif_iounmap(num_pages_lookup, 921 nr_lookup * 922 sizeof(*remote_window->num_pages_lookup.lookup), 923 ep->remote_dev); 924 scif_iounmap(dma_phys_lookup, 925 nr_lookup * 926 sizeof(*remote_window->dma_addr_lookup.lookup), 927 ep->remote_dev); 928 scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev); 929 window->peer_window = alloc->vaddr; 930 return err; 931} 932 933/** 934 * scif_send_scif_register: 935 * @ep: end point 936 * @window: self registration window 937 * 938 * Send a SCIF_REGISTER message if EP is connected and wait for a 939 * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT 940 * message so that the peer can free its remote window allocated earlier. 941 */ 942static int scif_send_scif_register(struct scif_endpt *ep, 943 struct scif_window *window) 944{ 945 int err = 0; 946 struct scifmsg msg; 947 948 msg.src = ep->port; 949 msg.payload[0] = ep->remote_ep; 950 msg.payload[1] = window->alloc_handle.vaddr; 951 msg.payload[2] = (u64)window; 952 spin_lock(&ep->lock); 953 if (ep->state == SCIFEP_CONNECTED) { 954 msg.uop = SCIF_REGISTER; 955 window->reg_state = OP_IN_PROGRESS; 956 err = _scif_nodeqp_send(ep->remote_dev, &msg); 957 spin_unlock(&ep->lock); 958 if (!err) { 959retry: 960 /* Wait for a SCIF_REGISTER_(N)ACK message */ 961 err = wait_event_timeout(window->regwq, 962 window->reg_state != 963 OP_IN_PROGRESS, 964 SCIF_NODE_ALIVE_TIMEOUT); 965 if (!err && scifdev_alive(ep)) 966 goto retry; 967 err = !err ? -ENODEV : 0; 968 if (window->reg_state == OP_FAILED) 969 err = -ENOTCONN; 970 } 971 } else { 972 msg.uop = SCIF_FREE_VIRT; 973 msg.payload[3] = SCIF_REGISTER; 974 err = _scif_nodeqp_send(ep->remote_dev, &msg); 975 spin_unlock(&ep->lock); 976 if (!err) 977 err = -ENOTCONN; 978 } 979 return err; 980} 981 982/** 983 * scif_get_window_offset: 984 * @ep: end point descriptor 985 * @flags: flags 986 * @offset: offset hint 987 * @num_pages: number of pages 988 * @out_offset: computed offset returned by reference. 989 * 990 * Compute/Claim a new offset for this EP. 991 */ 992int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset, 993 int num_pages, s64 *out_offset) 994{ 995 s64 page_index; 996 struct iova *iova_ptr; 997 int err = 0; 998 999 if (flags & SCIF_MAP_FIXED) { 1000 page_index = SCIF_IOVA_PFN(offset); 1001 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index, 1002 page_index + num_pages - 1); 1003 if (!iova_ptr) 1004 err = -EADDRINUSE; 1005 } else { 1006 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages, 1007 SCIF_DMA_63BIT_PFN - 1, 0); 1008 if (!iova_ptr) 1009 err = -ENOMEM; 1010 } 1011 if (!err) 1012 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT; 1013 return err; 1014} 1015 1016/** 1017 * scif_free_window_offset: 1018 * @ep: end point descriptor 1019 * @window: registration window 1020 * @offset: Offset to be freed 1021 * 1022 * Free offset for this EP. The callee is supposed to grab 1023 * the RMA mutex before calling this API. 1024 */ 1025void scif_free_window_offset(struct scif_endpt *ep, 1026 struct scif_window *window, s64 offset) 1027{ 1028 if ((window && !window->offset_freed) || !window) { 1029 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT); 1030 if (window) 1031 window->offset_freed = true; 1032 } 1033} 1034 1035/** 1036 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message 1037 * @msg: Interrupt message 1038 * 1039 * Remote side is requesting a memory allocation. 1040 */ 1041void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg) 1042{ 1043 int err; 1044 struct scif_window *window = NULL; 1045 int nr_pages = msg->payload[1]; 1046 1047 window = scif_create_remote_window(scifdev, nr_pages); 1048 if (!window) { 1049 err = -ENOMEM; 1050 goto error; 1051 } 1052 1053 /* The peer's allocation request is granted */ 1054 msg->uop = SCIF_ALLOC_GNT; 1055 msg->payload[0] = (u64)window; 1056 msg->payload[1] = window->mapped_offset; 1057 err = scif_nodeqp_send(scifdev, msg); 1058 if (err) 1059 scif_destroy_remote_window(window); 1060 return; 1061error: 1062 /* The peer's allocation request is rejected */ 1063 dev_err(&scifdev->sdev->dev, 1064 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n", 1065 __func__, __LINE__, err, window, nr_pages); 1066 msg->uop = SCIF_ALLOC_REJ; 1067 scif_nodeqp_send(scifdev, msg); 1068} 1069 1070/** 1071 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message 1072 * @msg: Interrupt message 1073 * 1074 * Remote side responded to a memory allocation. 1075 */ 1076void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg) 1077{ 1078 struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2]; 1079 struct scif_window *window = container_of(handle, struct scif_window, 1080 alloc_handle); 1081 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1082 1083 mutex_lock(&ep->rma_info.rma_lock); 1084 handle->vaddr = msg->payload[0]; 1085 handle->phys_addr = msg->payload[1]; 1086 if (msg->uop == SCIF_ALLOC_GNT) 1087 handle->state = OP_COMPLETED; 1088 else 1089 handle->state = OP_FAILED; 1090 wake_up(&handle->allocwq); 1091 mutex_unlock(&ep->rma_info.rma_lock); 1092} 1093 1094/** 1095 * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message 1096 * @msg: Interrupt message 1097 * 1098 * Free up memory kmalloc'd earlier. 1099 */ 1100void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg) 1101{ 1102 struct scif_window *window = (struct scif_window *)msg->payload[1]; 1103 1104 scif_destroy_remote_window(window); 1105} 1106 1107static void 1108scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window) 1109{ 1110 int j; 1111 struct scif_hw_dev *sdev = dev->sdev; 1112 phys_addr_t apt_base = 0; 1113 1114 /* 1115 * Add the aperture base if the DMA address is not card relative 1116 * since the DMA addresses need to be an offset into the bar 1117 */ 1118 if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && 1119 sdev->aper && !sdev->card_rel_da) 1120 apt_base = sdev->aper->pa; 1121 else 1122 return; 1123 1124 for (j = 0; j < window->nr_contig_chunks; j++) { 1125 if (window->num_pages[j]) 1126 window->dma_addr[j] += apt_base; 1127 else 1128 break; 1129 } 1130} 1131 1132/** 1133 * scif_recv_reg: Respond to SCIF_REGISTER interrupt message 1134 * @msg: Interrupt message 1135 * 1136 * Update remote window list with a new registered window. 1137 */ 1138void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg) 1139{ 1140 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; 1141 struct scif_window *window = 1142 (struct scif_window *)msg->payload[1]; 1143 1144 mutex_lock(&ep->rma_info.rma_lock); 1145 spin_lock(&ep->lock); 1146 if (ep->state == SCIFEP_CONNECTED) { 1147 msg->uop = SCIF_REGISTER_ACK; 1148 scif_nodeqp_send(ep->remote_dev, msg); 1149 scif_fixup_aper_base(ep->remote_dev, window); 1150 /* No further failures expected. Insert new window */ 1151 scif_insert_window(window, &ep->rma_info.remote_reg_list); 1152 } else { 1153 msg->uop = SCIF_REGISTER_NACK; 1154 scif_nodeqp_send(ep->remote_dev, msg); 1155 } 1156 spin_unlock(&ep->lock); 1157 mutex_unlock(&ep->rma_info.rma_lock); 1158 /* free up any lookup resources now that page lists are transferred */ 1159 scif_destroy_remote_lookup(ep->remote_dev, window); 1160 /* 1161 * We could not insert the window but we need to 1162 * destroy the window. 1163 */ 1164 if (msg->uop == SCIF_REGISTER_NACK) 1165 scif_destroy_remote_window(window); 1166} 1167 1168/** 1169 * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message 1170 * @msg: Interrupt message 1171 * 1172 * Remove window from remote registration list; 1173 */ 1174void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg) 1175{ 1176 struct scif_rma_req req; 1177 struct scif_window *window = NULL; 1178 struct scif_window *recv_window = 1179 (struct scif_window *)msg->payload[0]; 1180 struct scif_endpt *ep; 1181 int del_window = 0; 1182 1183 ep = (struct scif_endpt *)recv_window->ep; 1184 req.out_window = &window; 1185 req.offset = recv_window->offset; 1186 req.prot = 0; 1187 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; 1188 req.type = SCIF_WINDOW_FULL; 1189 req.head = &ep->rma_info.remote_reg_list; 1190 msg->payload[0] = ep->remote_ep; 1191 1192 mutex_lock(&ep->rma_info.rma_lock); 1193 /* Does a valid window exist? */ 1194 if (scif_query_window(&req)) { 1195 dev_err(&scifdev->sdev->dev, 1196 "%s %d -ENXIO\n", __func__, __LINE__); 1197 msg->uop = SCIF_UNREGISTER_ACK; 1198 goto error; 1199 } 1200 if (window) { 1201 if (window->ref_count) 1202 scif_put_window(window, window->nr_pages); 1203 else 1204 dev_err(&scifdev->sdev->dev, 1205 "%s %d ref count should be +ve\n", 1206 __func__, __LINE__); 1207 window->unreg_state = OP_COMPLETED; 1208 if (!window->ref_count) { 1209 msg->uop = SCIF_UNREGISTER_ACK; 1210 atomic_inc(&ep->rma_info.tw_refcount); 1211 ep->rma_info.async_list_del = 1; 1212 list_del_init(&window->list); 1213 del_window = 1; 1214 } else { 1215 /* NACK! There are valid references to this window */ 1216 msg->uop = SCIF_UNREGISTER_NACK; 1217 } 1218 } else { 1219 /* The window did not make its way to the list at all. ACK */ 1220 msg->uop = SCIF_UNREGISTER_ACK; 1221 scif_destroy_remote_window(recv_window); 1222 } 1223error: 1224 mutex_unlock(&ep->rma_info.rma_lock); 1225 if (del_window) 1226 scif_drain_dma_intr(ep->remote_dev->sdev, 1227 ep->rma_info.dma_chan); 1228 scif_nodeqp_send(ep->remote_dev, msg); 1229 if (del_window) 1230 scif_queue_for_cleanup(window, &scif_info.rma); 1231} 1232 1233/** 1234 * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message 1235 * @msg: Interrupt message 1236 * 1237 * Wake up the window waiting to complete registration. 1238 */ 1239void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg) 1240{ 1241 struct scif_window *window = 1242 (struct scif_window *)msg->payload[2]; 1243 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1244 1245 mutex_lock(&ep->rma_info.rma_lock); 1246 window->reg_state = OP_COMPLETED; 1247 wake_up(&window->regwq); 1248 mutex_unlock(&ep->rma_info.rma_lock); 1249} 1250 1251/** 1252 * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message 1253 * @msg: Interrupt message 1254 * 1255 * Wake up the window waiting to inform it that registration 1256 * cannot be completed. 1257 */ 1258void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg) 1259{ 1260 struct scif_window *window = 1261 (struct scif_window *)msg->payload[2]; 1262 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1263 1264 mutex_lock(&ep->rma_info.rma_lock); 1265 window->reg_state = OP_FAILED; 1266 wake_up(&window->regwq); 1267 mutex_unlock(&ep->rma_info.rma_lock); 1268} 1269 1270/** 1271 * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message 1272 * @msg: Interrupt message 1273 * 1274 * Wake up the window waiting to complete unregistration. 1275 */ 1276void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg) 1277{ 1278 struct scif_window *window = 1279 (struct scif_window *)msg->payload[1]; 1280 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1281 1282 mutex_lock(&ep->rma_info.rma_lock); 1283 window->unreg_state = OP_COMPLETED; 1284 wake_up(&window->unregwq); 1285 mutex_unlock(&ep->rma_info.rma_lock); 1286} 1287 1288/** 1289 * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message 1290 * @msg: Interrupt message 1291 * 1292 * Wake up the window waiting to inform it that unregistration 1293 * cannot be completed immediately. 1294 */ 1295void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg) 1296{ 1297 struct scif_window *window = 1298 (struct scif_window *)msg->payload[1]; 1299 struct scif_endpt *ep = (struct scif_endpt *)window->ep; 1300 1301 mutex_lock(&ep->rma_info.rma_lock); 1302 window->unreg_state = OP_FAILED; 1303 wake_up(&window->unregwq); 1304 mutex_unlock(&ep->rma_info.rma_lock); 1305} 1306 1307int __scif_pin_pages(void *addr, size_t len, int *out_prot, 1308 int map_flags, scif_pinned_pages_t *pages) 1309{ 1310 struct scif_pinned_pages *pinned_pages; 1311 int nr_pages, err = 0, i; 1312 bool vmalloc_addr = false; 1313 bool try_upgrade = false; 1314 int prot = *out_prot; 1315 int ulimit = 0; 1316 struct mm_struct *mm = NULL; 1317 1318 /* Unsupported flags */ 1319 if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT)) 1320 return -EINVAL; 1321 ulimit = !!(map_flags & SCIF_MAP_ULIMIT); 1322 1323 /* Unsupported protection requested */ 1324 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) 1325 return -EINVAL; 1326 1327 /* addr/len must be page aligned. len should be non zero */ 1328 if (!len || 1329 (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || 1330 (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) 1331 return -EINVAL; 1332 1333 might_sleep(); 1334 1335 nr_pages = len >> PAGE_SHIFT; 1336 1337 /* Allocate a set of pinned pages */ 1338 pinned_pages = scif_create_pinned_pages(nr_pages, prot); 1339 if (!pinned_pages) 1340 return -ENOMEM; 1341 1342 if (map_flags & SCIF_MAP_KERNEL) { 1343 if (is_vmalloc_addr(addr)) 1344 vmalloc_addr = true; 1345 1346 for (i = 0; i < nr_pages; i++) { 1347 if (vmalloc_addr) 1348 pinned_pages->pages[i] = 1349 vmalloc_to_page(addr + (i * PAGE_SIZE)); 1350 else 1351 pinned_pages->pages[i] = 1352 virt_to_page(addr + (i * PAGE_SIZE)); 1353 } 1354 pinned_pages->nr_pages = nr_pages; 1355 pinned_pages->map_flags = SCIF_MAP_KERNEL; 1356 } else { 1357 /* 1358 * SCIF supports registration caching. If a registration has 1359 * been requested with read only permissions, then we try 1360 * to pin the pages with RW permissions so that a subsequent 1361 * transfer with RW permission can hit the cache instead of 1362 * invalidating it. If the upgrade fails with RW then we 1363 * revert back to R permission and retry 1364 */ 1365 if (prot == SCIF_PROT_READ) 1366 try_upgrade = true; 1367 prot |= SCIF_PROT_WRITE; 1368retry: 1369 mm = current->mm; 1370 if (ulimit) { 1371 err = __scif_check_inc_pinned_vm(mm, nr_pages); 1372 if (err) { 1373 pinned_pages->nr_pages = 0; 1374 goto error_unmap; 1375 } 1376 } 1377 1378 pinned_pages->nr_pages = get_user_pages_fast( 1379 (u64)addr, 1380 nr_pages, 1381 (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0, 1382 pinned_pages->pages); 1383 if (nr_pages != pinned_pages->nr_pages) { 1384 if (try_upgrade) { 1385 if (ulimit) 1386 __scif_dec_pinned_vm_lock(mm, nr_pages); 1387 /* Roll back any pinned pages */ 1388 for (i = 0; i < pinned_pages->nr_pages; i++) { 1389 if (pinned_pages->pages[i]) 1390 put_page( 1391 pinned_pages->pages[i]); 1392 } 1393 prot &= ~SCIF_PROT_WRITE; 1394 try_upgrade = false; 1395 goto retry; 1396 } 1397 } 1398 pinned_pages->map_flags = 0; 1399 } 1400 1401 if (pinned_pages->nr_pages < nr_pages) { 1402 err = -EFAULT; 1403 pinned_pages->nr_pages = nr_pages; 1404 goto dec_pinned; 1405 } 1406 1407 *out_prot = prot; 1408 atomic_set(&pinned_pages->ref_count, 1); 1409 *pages = pinned_pages; 1410 return err; 1411dec_pinned: 1412 if (ulimit) 1413 __scif_dec_pinned_vm_lock(mm, nr_pages); 1414 /* Something went wrong! Rollback */ 1415error_unmap: 1416 pinned_pages->nr_pages = nr_pages; 1417 scif_destroy_pinned_pages(pinned_pages); 1418 *pages = NULL; 1419 dev_dbg(scif_info.mdev.this_device, 1420 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len); 1421 return err; 1422} 1423 1424int scif_pin_pages(void *addr, size_t len, int prot, 1425 int map_flags, scif_pinned_pages_t *pages) 1426{ 1427 return __scif_pin_pages(addr, len, &prot, map_flags, pages); 1428} 1429EXPORT_SYMBOL_GPL(scif_pin_pages); 1430 1431int scif_unpin_pages(scif_pinned_pages_t pinned_pages) 1432{ 1433 int err = 0, ret; 1434 1435 if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic) 1436 return -EINVAL; 1437 1438 ret = atomic_sub_return(1, &pinned_pages->ref_count); 1439 if (ret < 0) { 1440 dev_err(scif_info.mdev.this_device, 1441 "%s %d scif_unpin_pages called without pinning? rc %d\n", 1442 __func__, __LINE__, ret); 1443 return -EINVAL; 1444 } 1445 /* 1446 * Destroy the window if the ref count for this set of pinned 1447 * pages has dropped to zero. If it is positive then there is 1448 * a valid registered window which is backed by these pages and 1449 * it will be destroyed once all such windows are unregistered. 1450 */ 1451 if (!ret) 1452 err = scif_destroy_pinned_pages(pinned_pages); 1453 1454 return err; 1455} 1456EXPORT_SYMBOL_GPL(scif_unpin_pages); 1457 1458static inline void 1459scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep) 1460{ 1461 mutex_lock(&ep->rma_info.rma_lock); 1462 scif_insert_window(window, &ep->rma_info.reg_list); 1463 mutex_unlock(&ep->rma_info.rma_lock); 1464} 1465 1466off_t scif_register_pinned_pages(scif_epd_t epd, 1467 scif_pinned_pages_t pinned_pages, 1468 off_t offset, int map_flags) 1469{ 1470 struct scif_endpt *ep = (struct scif_endpt *)epd; 1471 s64 computed_offset; 1472 struct scif_window *window; 1473 int err; 1474 size_t len; 1475 struct device *spdev; 1476 1477 /* Unsupported flags */ 1478 if (map_flags & ~SCIF_MAP_FIXED) 1479 return -EINVAL; 1480 1481 len = pinned_pages->nr_pages << PAGE_SHIFT; 1482 1483 /* 1484 * Offset is not page aligned/negative or offset+len 1485 * wraps around with SCIF_MAP_FIXED. 1486 */ 1487 if ((map_flags & SCIF_MAP_FIXED) && 1488 ((ALIGN(offset, PAGE_SIZE) != offset) || 1489 (offset < 0) || 1490 (len > LONG_MAX - offset))) 1491 return -EINVAL; 1492 1493 might_sleep(); 1494 1495 err = scif_verify_epd(ep); 1496 if (err) 1497 return err; 1498 /* 1499 * It is an error to pass pinned_pages to scif_register_pinned_pages() 1500 * after calling scif_unpin_pages(). 1501 */ 1502 if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0)) 1503 return -EINVAL; 1504 1505 /* Compute the offset for this registration */ 1506 err = scif_get_window_offset(ep, map_flags, offset, 1507 len, &computed_offset); 1508 if (err) { 1509 atomic_sub(1, &pinned_pages->ref_count); 1510 return err; 1511 } 1512 1513 /* Allocate and prepare self registration window */ 1514 window = scif_create_window(ep, pinned_pages->nr_pages, 1515 computed_offset, false); 1516 if (!window) { 1517 atomic_sub(1, &pinned_pages->ref_count); 1518 scif_free_window_offset(ep, NULL, computed_offset); 1519 return -ENOMEM; 1520 } 1521 1522 window->pinned_pages = pinned_pages; 1523 window->nr_pages = pinned_pages->nr_pages; 1524 window->prot = pinned_pages->prot; 1525 1526 spdev = scif_get_peer_dev(ep->remote_dev); 1527 if (IS_ERR(spdev)) { 1528 err = PTR_ERR(spdev); 1529 scif_destroy_window(ep, window); 1530 return err; 1531 } 1532 err = scif_send_alloc_request(ep, window); 1533 if (err) { 1534 dev_err(&ep->remote_dev->sdev->dev, 1535 "%s %d err %d\n", __func__, __LINE__, err); 1536 goto error_unmap; 1537 } 1538 1539 /* Prepare the remote registration window */ 1540 err = scif_prep_remote_window(ep, window); 1541 if (err) { 1542 dev_err(&ep->remote_dev->sdev->dev, 1543 "%s %d err %d\n", __func__, __LINE__, err); 1544 goto error_unmap; 1545 } 1546 1547 /* Tell the peer about the new window */ 1548 err = scif_send_scif_register(ep, window); 1549 if (err) { 1550 dev_err(&ep->remote_dev->sdev->dev, 1551 "%s %d err %d\n", __func__, __LINE__, err); 1552 goto error_unmap; 1553 } 1554 1555 scif_put_peer_dev(spdev); 1556 /* No further failures expected. Insert new window */ 1557 scif_insert_local_window(window, ep); 1558 return computed_offset; 1559error_unmap: 1560 scif_destroy_window(ep, window); 1561 scif_put_peer_dev(spdev); 1562 dev_err(&ep->remote_dev->sdev->dev, 1563 "%s %d err %d\n", __func__, __LINE__, err); 1564 return err; 1565} 1566EXPORT_SYMBOL_GPL(scif_register_pinned_pages); 1567 1568off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, 1569 int prot, int map_flags) 1570{ 1571 scif_pinned_pages_t pinned_pages; 1572 off_t err; 1573 struct scif_endpt *ep = (struct scif_endpt *)epd; 1574 s64 computed_offset; 1575 struct scif_window *window; 1576 struct mm_struct *mm = NULL; 1577 struct device *spdev; 1578 1579 dev_dbg(scif_info.mdev.this_device, 1580 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n", 1581 epd, addr, len, offset, prot, map_flags); 1582 /* Unsupported flags */ 1583 if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL)) 1584 return -EINVAL; 1585 1586 /* 1587 * Offset is not page aligned/negative or offset+len 1588 * wraps around with SCIF_MAP_FIXED. 1589 */ 1590 if ((map_flags & SCIF_MAP_FIXED) && 1591 ((ALIGN(offset, PAGE_SIZE) != offset) || 1592 (offset < 0) || 1593 (len > LONG_MAX - offset))) 1594 return -EINVAL; 1595 1596 /* Unsupported protection requested */ 1597 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) 1598 return -EINVAL; 1599 1600 /* addr/len must be page aligned. len should be non zero */ 1601 if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || 1602 (ALIGN(len, PAGE_SIZE) != len)) 1603 return -EINVAL; 1604 1605 might_sleep(); 1606 1607 err = scif_verify_epd(ep); 1608 if (err) 1609 return err; 1610 1611 /* Compute the offset for this registration */ 1612 err = scif_get_window_offset(ep, map_flags, offset, 1613 len >> PAGE_SHIFT, &computed_offset); 1614 if (err) 1615 return err; 1616 1617 spdev = scif_get_peer_dev(ep->remote_dev); 1618 if (IS_ERR(spdev)) { 1619 err = PTR_ERR(spdev); 1620 scif_free_window_offset(ep, NULL, computed_offset); 1621 return err; 1622 } 1623 /* Allocate and prepare self registration window */ 1624 window = scif_create_window(ep, len >> PAGE_SHIFT, 1625 computed_offset, false); 1626 if (!window) { 1627 scif_free_window_offset(ep, NULL, computed_offset); 1628 scif_put_peer_dev(spdev); 1629 return -ENOMEM; 1630 } 1631 1632 window->nr_pages = len >> PAGE_SHIFT; 1633 1634 err = scif_send_alloc_request(ep, window); 1635 if (err) { 1636 scif_destroy_incomplete_window(ep, window); 1637 scif_put_peer_dev(spdev); 1638 return err; 1639 } 1640 1641 if (!(map_flags & SCIF_MAP_KERNEL)) { 1642 mm = __scif_acquire_mm(); 1643 map_flags |= SCIF_MAP_ULIMIT; 1644 } 1645 /* Pin down the pages */ 1646 err = __scif_pin_pages(addr, len, &prot, 1647 map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT), 1648 &pinned_pages); 1649 if (err) { 1650 scif_destroy_incomplete_window(ep, window); 1651 __scif_release_mm(mm); 1652 goto error; 1653 } 1654 1655 window->pinned_pages = pinned_pages; 1656 window->prot = pinned_pages->prot; 1657 window->mm = mm; 1658 1659 /* Prepare the remote registration window */ 1660 err = scif_prep_remote_window(ep, window); 1661 if (err) { 1662 dev_err(&ep->remote_dev->sdev->dev, 1663 "%s %d err %ld\n", __func__, __LINE__, err); 1664 goto error_unmap; 1665 } 1666 1667 /* Tell the peer about the new window */ 1668 err = scif_send_scif_register(ep, window); 1669 if (err) { 1670 dev_err(&ep->remote_dev->sdev->dev, 1671 "%s %d err %ld\n", __func__, __LINE__, err); 1672 goto error_unmap; 1673 } 1674 1675 scif_put_peer_dev(spdev); 1676 /* No further failures expected. Insert new window */ 1677 scif_insert_local_window(window, ep); 1678 dev_dbg(&ep->remote_dev->sdev->dev, 1679 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n", 1680 epd, addr, len, computed_offset); 1681 return computed_offset; 1682error_unmap: 1683 scif_destroy_window(ep, window); 1684error: 1685 scif_put_peer_dev(spdev); 1686 dev_err(&ep->remote_dev->sdev->dev, 1687 "%s %d err %ld\n", __func__, __LINE__, err); 1688 return err; 1689} 1690EXPORT_SYMBOL_GPL(scif_register); 1691 1692int 1693scif_unregister(scif_epd_t epd, off_t offset, size_t len) 1694{ 1695 struct scif_endpt *ep = (struct scif_endpt *)epd; 1696 struct scif_window *window = NULL; 1697 struct scif_rma_req req; 1698 int nr_pages, err; 1699 struct device *spdev; 1700 1701 dev_dbg(scif_info.mdev.this_device, 1702 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n", 1703 ep, offset, len); 1704 /* len must be page aligned. len should be non zero */ 1705 if (!len || 1706 (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) 1707 return -EINVAL; 1708 1709 /* Offset is not page aligned or offset+len wraps around */ 1710 if ((ALIGN(offset, PAGE_SIZE) != offset) || 1711 (offset < 0) || 1712 (len > LONG_MAX - offset)) 1713 return -EINVAL; 1714 1715 err = scif_verify_epd(ep); 1716 if (err) 1717 return err; 1718 1719 might_sleep(); 1720 nr_pages = len >> PAGE_SHIFT; 1721 1722 req.out_window = &window; 1723 req.offset = offset; 1724 req.prot = 0; 1725 req.nr_bytes = len; 1726 req.type = SCIF_WINDOW_FULL; 1727 req.head = &ep->rma_info.reg_list; 1728 1729 spdev = scif_get_peer_dev(ep->remote_dev); 1730 if (IS_ERR(spdev)) { 1731 err = PTR_ERR(spdev); 1732 return err; 1733 } 1734 mutex_lock(&ep->rma_info.rma_lock); 1735 /* Does a valid window exist? */ 1736 err = scif_query_window(&req); 1737 if (err) { 1738 dev_err(&ep->remote_dev->sdev->dev, 1739 "%s %d err %d\n", __func__, __LINE__, err); 1740 goto error; 1741 } 1742 /* Unregister all the windows in this range */ 1743 err = scif_rma_list_unregister(window, offset, nr_pages); 1744 if (err) 1745 dev_err(&ep->remote_dev->sdev->dev, 1746 "%s %d err %d\n", __func__, __LINE__, err); 1747error: 1748 mutex_unlock(&ep->rma_info.rma_lock); 1749 scif_put_peer_dev(spdev); 1750 return err; 1751} 1752EXPORT_SYMBOL_GPL(scif_unregister);