Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

misc: mic: SCIF remote memory map/unmap interface

This patch implements the SCIF mmap/munmap interface. A similar
capability is provided to kernel clients via the
scif_get_pages()/scif_put_pages() APIs. The SCIF mmap interface
queries to check if a window is valid and then remaps the local
virtual address to the remote physical pages. These mappings are
subsequently destroyed upon receipt of the VMA close operation or
scif_get_pages(). This functionality allows SCIF users to directly
access remote memory without any driver interaction once the mappings
are created thereby providing bare-metal PCIe latency. These mappings
are zapped to avoid RMA accesses from user space, if a Coprocessor is
reset.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Sudeep Dutt and committed by
Greg Kroah-Hartman
f1a2d865 168ef015

+699
+699
drivers/misc/mic/scif/scif_mmap.c
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2015 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * Intel SCIF driver. 16 + * 17 + */ 18 + #include "scif_main.h" 19 + 20 + /* 21 + * struct scif_vma_info - Information about a remote memory mapping 22 + * created via scif_mmap(..) 23 + * @vma: VM area struct 24 + * @list: link to list of active vmas 25 + */ 26 + struct scif_vma_info { 27 + struct vm_area_struct *vma; 28 + struct list_head list; 29 + }; 30 + 31 + void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg) 32 + { 33 + struct scif_rma_req req; 34 + struct scif_window *window = NULL; 35 + struct scif_window *recv_window = 36 + (struct scif_window *)msg->payload[0]; 37 + struct scif_endpt *ep; 38 + 39 + ep = (struct scif_endpt *)recv_window->ep; 40 + req.out_window = &window; 41 + req.offset = recv_window->offset; 42 + req.prot = recv_window->prot; 43 + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; 44 + req.type = SCIF_WINDOW_FULL; 45 + req.head = &ep->rma_info.reg_list; 46 + msg->payload[0] = ep->remote_ep; 47 + 48 + mutex_lock(&ep->rma_info.rma_lock); 49 + /* Does a valid window exist? */ 50 + if (scif_query_window(&req)) { 51 + dev_err(&scifdev->sdev->dev, 52 + "%s %d -ENXIO\n", __func__, __LINE__); 53 + msg->uop = SCIF_UNREGISTER_ACK; 54 + goto error; 55 + } 56 + 57 + scif_put_window(window, window->nr_pages); 58 + 59 + if (!window->ref_count) { 60 + atomic_inc(&ep->rma_info.tw_refcount); 61 + ep->rma_info.async_list_del = 1; 62 + list_del_init(&window->list); 63 + scif_free_window_offset(ep, window, window->offset); 64 + } 65 + error: 66 + mutex_unlock(&ep->rma_info.rma_lock); 67 + if (window && !window->ref_count) 68 + scif_queue_for_cleanup(window, &scif_info.rma); 69 + } 70 + 71 + /* 72 + * Remove valid remote memory mappings created via scif_mmap(..) from the 73 + * process address space since the remote node is lost 74 + */ 75 + static void __scif_zap_mmaps(struct scif_endpt *ep) 76 + { 77 + struct list_head *item; 78 + struct scif_vma_info *info; 79 + struct vm_area_struct *vma; 80 + unsigned long size; 81 + 82 + spin_lock(&ep->lock); 83 + list_for_each(item, &ep->rma_info.vma_list) { 84 + info = list_entry(item, struct scif_vma_info, list); 85 + vma = info->vma; 86 + size = vma->vm_end - vma->vm_start; 87 + zap_vma_ptes(vma, vma->vm_start, size); 88 + dev_dbg(scif_info.mdev.this_device, 89 + "%s ep %p zap vma %p size 0x%lx\n", 90 + __func__, ep, info->vma, size); 91 + } 92 + spin_unlock(&ep->lock); 93 + } 94 + 95 + /* 96 + * Traverse the list of endpoints for a particular remote node and 97 + * zap valid remote memory mappings since the remote node is lost 98 + */ 99 + static void _scif_zap_mmaps(int node, struct list_head *head) 100 + { 101 + struct scif_endpt *ep; 102 + struct list_head *item; 103 + 104 + mutex_lock(&scif_info.connlock); 105 + list_for_each(item, head) { 106 + ep = list_entry(item, struct scif_endpt, list); 107 + if (ep->remote_dev->node == node) 108 + __scif_zap_mmaps(ep); 109 + } 110 + mutex_unlock(&scif_info.connlock); 111 + } 112 + 113 + /* 114 + * Wrapper for removing remote memory mappings for a particular node. This API 115 + * is called by peer nodes as part of handling a lost node. 116 + */ 117 + void scif_zap_mmaps(int node) 118 + { 119 + _scif_zap_mmaps(node, &scif_info.connected); 120 + _scif_zap_mmaps(node, &scif_info.disconnected); 121 + } 122 + 123 + /* 124 + * This API is only called while handling a lost node: 125 + * a) Remote node is dead. 126 + * b) Remote memory mappings have been zapped 127 + * So we can traverse the remote_reg_list without any locks. Since 128 + * the window has not yet been unregistered we can drop the ref count 129 + * and queue it to the cleanup thread. 130 + */ 131 + static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep) 132 + { 133 + struct list_head *pos, *tmp; 134 + struct scif_window *window; 135 + 136 + list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) { 137 + window = list_entry(pos, struct scif_window, list); 138 + if (window->ref_count) 139 + scif_put_window(window, window->nr_pages); 140 + else 141 + dev_err(scif_info.mdev.this_device, 142 + "%s %d unexpected\n", 143 + __func__, __LINE__); 144 + if (!window->ref_count) { 145 + atomic_inc(&ep->rma_info.tw_refcount); 146 + list_del_init(&window->list); 147 + scif_queue_for_cleanup(window, &scif_info.rma); 148 + } 149 + } 150 + } 151 + 152 + /* Cleanup remote registration lists for zombie endpoints */ 153 + void scif_cleanup_rma_for_zombies(int node) 154 + { 155 + struct scif_endpt *ep; 156 + struct list_head *item; 157 + 158 + mutex_lock(&scif_info.eplock); 159 + list_for_each(item, &scif_info.zombie) { 160 + ep = list_entry(item, struct scif_endpt, list); 161 + if (ep->remote_dev && ep->remote_dev->node == node) 162 + __scif_cleanup_rma_for_zombies(ep); 163 + } 164 + mutex_unlock(&scif_info.eplock); 165 + flush_work(&scif_info.misc_work); 166 + } 167 + 168 + /* Insert the VMA into the per endpoint VMA list */ 169 + static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma) 170 + { 171 + struct scif_vma_info *info; 172 + int err = 0; 173 + 174 + info = kzalloc(sizeof(*info), GFP_KERNEL); 175 + if (!info) { 176 + err = -ENOMEM; 177 + goto done; 178 + } 179 + info->vma = vma; 180 + spin_lock(&ep->lock); 181 + list_add_tail(&info->list, &ep->rma_info.vma_list); 182 + spin_unlock(&ep->lock); 183 + done: 184 + return err; 185 + } 186 + 187 + /* Delete the VMA from the per endpoint VMA list */ 188 + static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma) 189 + { 190 + struct list_head *item; 191 + struct scif_vma_info *info; 192 + 193 + spin_lock(&ep->lock); 194 + list_for_each(item, &ep->rma_info.vma_list) { 195 + info = list_entry(item, struct scif_vma_info, list); 196 + if (info->vma == vma) { 197 + list_del(&info->list); 198 + kfree(info); 199 + break; 200 + } 201 + } 202 + spin_unlock(&ep->lock); 203 + } 204 + 205 + static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep) 206 + { 207 + struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev; 208 + struct scif_hw_dev *sdev = scifdev->sdev; 209 + phys_addr_t out_phys, apt_base = 0; 210 + 211 + /* 212 + * If the DMA address is card relative then we need to add the 213 + * aperture base for mmap to work correctly 214 + */ 215 + if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da) 216 + apt_base = sdev->aper->pa; 217 + out_phys = apt_base + phys; 218 + return out_phys; 219 + } 220 + 221 + int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, 222 + struct scif_range **pages) 223 + { 224 + struct scif_endpt *ep = (struct scif_endpt *)epd; 225 + struct scif_rma_req req; 226 + struct scif_window *window = NULL; 227 + int nr_pages, err, i; 228 + 229 + dev_dbg(scif_info.mdev.this_device, 230 + "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n", 231 + ep, offset, len); 232 + err = scif_verify_epd(ep); 233 + if (err) 234 + return err; 235 + 236 + if (!len || (offset < 0) || 237 + (offset + len < offset) || 238 + (ALIGN(offset, PAGE_SIZE) != offset) || 239 + (ALIGN(len, PAGE_SIZE) != len)) 240 + return -EINVAL; 241 + 242 + nr_pages = len >> PAGE_SHIFT; 243 + 244 + req.out_window = &window; 245 + req.offset = offset; 246 + req.prot = 0; 247 + req.nr_bytes = len; 248 + req.type = SCIF_WINDOW_SINGLE; 249 + req.head = &ep->rma_info.remote_reg_list; 250 + 251 + mutex_lock(&ep->rma_info.rma_lock); 252 + /* Does a valid window exist? */ 253 + err = scif_query_window(&req); 254 + if (err) { 255 + dev_err(&ep->remote_dev->sdev->dev, 256 + "%s %d err %d\n", __func__, __LINE__, err); 257 + goto error; 258 + } 259 + 260 + /* Allocate scif_range */ 261 + *pages = kzalloc(sizeof(**pages), GFP_KERNEL); 262 + if (!*pages) { 263 + err = -ENOMEM; 264 + goto error; 265 + } 266 + 267 + /* Allocate phys addr array */ 268 + (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)); 269 + if (!((*pages)->phys_addr)) { 270 + err = -ENOMEM; 271 + goto error; 272 + } 273 + 274 + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) { 275 + /* Allocate virtual address array */ 276 + ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *))); 277 + if (!(*pages)->va) { 278 + err = -ENOMEM; 279 + goto error; 280 + } 281 + } 282 + /* Populate the values */ 283 + (*pages)->cookie = window; 284 + (*pages)->nr_pages = nr_pages; 285 + (*pages)->prot_flags = window->prot; 286 + 287 + for (i = 0; i < nr_pages; i++) { 288 + (*pages)->phys_addr[i] = 289 + __scif_off_to_dma_addr(window, offset + 290 + (i * PAGE_SIZE)); 291 + (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i], 292 + ep); 293 + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) 294 + (*pages)->va[i] = 295 + ep->remote_dev->sdev->aper->va + 296 + (*pages)->phys_addr[i] - 297 + ep->remote_dev->sdev->aper->pa; 298 + } 299 + 300 + scif_get_window(window, nr_pages); 301 + error: 302 + mutex_unlock(&ep->rma_info.rma_lock); 303 + if (err) { 304 + if (*pages) { 305 + scif_free((*pages)->phys_addr, 306 + nr_pages * sizeof(dma_addr_t)); 307 + scif_free((*pages)->va, 308 + nr_pages * sizeof(void *)); 309 + kfree(*pages); 310 + *pages = NULL; 311 + } 312 + dev_err(&ep->remote_dev->sdev->dev, 313 + "%s %d err %d\n", __func__, __LINE__, err); 314 + } 315 + return err; 316 + } 317 + EXPORT_SYMBOL_GPL(scif_get_pages); 318 + 319 + int scif_put_pages(struct scif_range *pages) 320 + { 321 + struct scif_endpt *ep; 322 + struct scif_window *window; 323 + struct scifmsg msg; 324 + 325 + if (!pages || !pages->cookie) 326 + return -EINVAL; 327 + 328 + window = pages->cookie; 329 + 330 + if (!window || window->magic != SCIFEP_MAGIC) 331 + return -EINVAL; 332 + 333 + ep = (struct scif_endpt *)window->ep; 334 + /* 335 + * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the 336 + * callee should be allowed to release references to the pages, 337 + * else the endpoint was not connected in the first place, 338 + * hence the ENOTCONN. 339 + */ 340 + if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED) 341 + return -ENOTCONN; 342 + 343 + mutex_lock(&ep->rma_info.rma_lock); 344 + 345 + scif_put_window(window, pages->nr_pages); 346 + 347 + /* Initiate window destruction if ref count is zero */ 348 + if (!window->ref_count) { 349 + list_del(&window->list); 350 + mutex_unlock(&ep->rma_info.rma_lock); 351 + scif_drain_dma_intr(ep->remote_dev->sdev, 352 + ep->rma_info.dma_chan); 353 + /* Inform the peer about this window being destroyed. */ 354 + msg.uop = SCIF_MUNMAP; 355 + msg.src = ep->port; 356 + msg.payload[0] = window->peer_window; 357 + /* No error handling for notification messages */ 358 + scif_nodeqp_send(ep->remote_dev, &msg); 359 + /* Destroy this window from the peer's registered AS */ 360 + scif_destroy_remote_window(window); 361 + } else { 362 + mutex_unlock(&ep->rma_info.rma_lock); 363 + } 364 + 365 + scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t)); 366 + scif_free(pages->va, pages->nr_pages * sizeof(void *)); 367 + kfree(pages); 368 + return 0; 369 + } 370 + EXPORT_SYMBOL_GPL(scif_put_pages); 371 + 372 + /* 373 + * scif_rma_list_mmap: 374 + * 375 + * Traverse the remote registration list starting from start_window: 376 + * 1) Create VtoP mappings via remap_pfn_range(..) 377 + * 2) Once step 1) and 2) complete successfully then traverse the range of 378 + * windows again and bump the reference count. 379 + * RMA lock must be held. 380 + */ 381 + static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset, 382 + int nr_pages, struct vm_area_struct *vma) 383 + { 384 + s64 end_offset, loop_offset = offset; 385 + struct scif_window *window = start_window; 386 + int loop_nr_pages, nr_pages_left = nr_pages; 387 + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; 388 + struct list_head *head = &ep->rma_info.remote_reg_list; 389 + int i, err = 0; 390 + dma_addr_t phys_addr; 391 + struct scif_window_iter src_win_iter; 392 + size_t contig_bytes = 0; 393 + 394 + might_sleep(); 395 + list_for_each_entry_from(window, head, list) { 396 + end_offset = window->offset + 397 + (window->nr_pages << PAGE_SHIFT); 398 + loop_nr_pages = min_t(int, 399 + (end_offset - loop_offset) >> PAGE_SHIFT, 400 + nr_pages_left); 401 + scif_init_window_iter(window, &src_win_iter); 402 + for (i = 0; i < loop_nr_pages; i++) { 403 + phys_addr = scif_off_to_dma_addr(window, loop_offset, 404 + &contig_bytes, 405 + &src_win_iter); 406 + phys_addr = scif_get_phys(phys_addr, ep); 407 + err = remap_pfn_range(vma, 408 + vma->vm_start + 409 + loop_offset - offset, 410 + phys_addr >> PAGE_SHIFT, 411 + PAGE_SIZE, 412 + vma->vm_page_prot); 413 + if (err) 414 + goto error; 415 + loop_offset += PAGE_SIZE; 416 + } 417 + nr_pages_left -= loop_nr_pages; 418 + if (!nr_pages_left) 419 + break; 420 + } 421 + /* 422 + * No more failures expected. Bump up the ref count for all 423 + * the windows. Another traversal from start_window required 424 + * for handling errors encountered across windows during 425 + * remap_pfn_range(..). 426 + */ 427 + loop_offset = offset; 428 + nr_pages_left = nr_pages; 429 + window = start_window; 430 + head = &ep->rma_info.remote_reg_list; 431 + list_for_each_entry_from(window, head, list) { 432 + end_offset = window->offset + 433 + (window->nr_pages << PAGE_SHIFT); 434 + loop_nr_pages = min_t(int, 435 + (end_offset - loop_offset) >> PAGE_SHIFT, 436 + nr_pages_left); 437 + scif_get_window(window, loop_nr_pages); 438 + nr_pages_left -= loop_nr_pages; 439 + loop_offset += (loop_nr_pages << PAGE_SHIFT); 440 + if (!nr_pages_left) 441 + break; 442 + } 443 + error: 444 + if (err) 445 + dev_err(scif_info.mdev.this_device, 446 + "%s %d err %d\n", __func__, __LINE__, err); 447 + return err; 448 + } 449 + 450 + /* 451 + * scif_rma_list_munmap: 452 + * 453 + * Traverse the remote registration list starting from window: 454 + * 1) Decrement ref count. 455 + * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. 456 + * RMA lock must be held. 457 + */ 458 + static void scif_rma_list_munmap(struct scif_window *start_window, 459 + s64 offset, int nr_pages) 460 + { 461 + struct scifmsg msg; 462 + s64 loop_offset = offset, end_offset; 463 + int loop_nr_pages, nr_pages_left = nr_pages; 464 + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; 465 + struct list_head *head = &ep->rma_info.remote_reg_list; 466 + struct scif_window *window = start_window, *_window; 467 + 468 + msg.uop = SCIF_MUNMAP; 469 + msg.src = ep->port; 470 + loop_offset = offset; 471 + nr_pages_left = nr_pages; 472 + list_for_each_entry_safe_from(window, _window, head, list) { 473 + end_offset = window->offset + 474 + (window->nr_pages << PAGE_SHIFT); 475 + loop_nr_pages = min_t(int, 476 + (end_offset - loop_offset) >> PAGE_SHIFT, 477 + nr_pages_left); 478 + scif_put_window(window, loop_nr_pages); 479 + if (!window->ref_count) { 480 + struct scif_dev *rdev = ep->remote_dev; 481 + 482 + scif_drain_dma_intr(rdev->sdev, 483 + ep->rma_info.dma_chan); 484 + /* Inform the peer about this munmap */ 485 + msg.payload[0] = window->peer_window; 486 + /* No error handling for Notification messages. */ 487 + scif_nodeqp_send(ep->remote_dev, &msg); 488 + list_del(&window->list); 489 + /* Destroy this window from the peer's registered AS */ 490 + scif_destroy_remote_window(window); 491 + } 492 + nr_pages_left -= loop_nr_pages; 493 + loop_offset += (loop_nr_pages << PAGE_SHIFT); 494 + if (!nr_pages_left) 495 + break; 496 + } 497 + } 498 + 499 + /* 500 + * The private data field of each VMA used to mmap a remote window 501 + * points to an instance of struct vma_pvt 502 + */ 503 + struct vma_pvt { 504 + struct scif_endpt *ep; /* End point for remote window */ 505 + s64 offset; /* offset within remote window */ 506 + bool valid_offset; /* offset is valid only if the original 507 + * mmap request was for a single page 508 + * else the offset within the vma is 509 + * the correct offset 510 + */ 511 + struct kref ref; 512 + }; 513 + 514 + static void vma_pvt_release(struct kref *ref) 515 + { 516 + struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref); 517 + 518 + kfree(vmapvt); 519 + } 520 + 521 + /** 522 + * scif_vma_open - VMA open driver callback 523 + * @vma: VMM memory area. 524 + * The open method is called by the kernel to allow the subsystem implementing 525 + * the VMA to initialize the area. This method is invoked any time a new 526 + * reference to the VMA is made (when a process forks, for example). 527 + * The one exception happens when the VMA is first created by mmap; 528 + * in this case, the driver's mmap method is called instead. 529 + * This function is also invoked when an existing VMA is split by the kernel 530 + * due to a call to munmap on a subset of the VMA resulting in two VMAs. 531 + * The kernel invokes this function only on one of the two VMAs. 532 + */ 533 + static void scif_vma_open(struct vm_area_struct *vma) 534 + { 535 + struct vma_pvt *vmapvt = vma->vm_private_data; 536 + 537 + dev_dbg(scif_info.mdev.this_device, 538 + "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", 539 + vma->vm_start, vma->vm_end); 540 + scif_insert_vma(vmapvt->ep, vma); 541 + kref_get(&vmapvt->ref); 542 + } 543 + 544 + /** 545 + * scif_munmap - VMA close driver callback. 546 + * @vma: VMM memory area. 547 + * When an area is destroyed, the kernel calls its close operation. 548 + * Note that there's no usage count associated with VMA's; the area 549 + * is opened and closed exactly once by each process that uses it. 550 + */ 551 + static void scif_munmap(struct vm_area_struct *vma) 552 + { 553 + struct scif_endpt *ep; 554 + struct vma_pvt *vmapvt = vma->vm_private_data; 555 + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 556 + s64 offset; 557 + struct scif_rma_req req; 558 + struct scif_window *window = NULL; 559 + int err; 560 + 561 + might_sleep(); 562 + dev_dbg(scif_info.mdev.this_device, 563 + "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", 564 + vma->vm_start, vma->vm_end); 565 + ep = vmapvt->ep; 566 + offset = vmapvt->valid_offset ? vmapvt->offset : 567 + (vma->vm_pgoff) << PAGE_SHIFT; 568 + dev_dbg(scif_info.mdev.this_device, 569 + "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n", 570 + ep, nr_pages, offset); 571 + req.out_window = &window; 572 + req.offset = offset; 573 + req.nr_bytes = vma->vm_end - vma->vm_start; 574 + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); 575 + req.type = SCIF_WINDOW_PARTIAL; 576 + req.head = &ep->rma_info.remote_reg_list; 577 + 578 + mutex_lock(&ep->rma_info.rma_lock); 579 + 580 + err = scif_query_window(&req); 581 + if (err) 582 + dev_err(scif_info.mdev.this_device, 583 + "%s %d err %d\n", __func__, __LINE__, err); 584 + else 585 + scif_rma_list_munmap(window, offset, nr_pages); 586 + 587 + mutex_unlock(&ep->rma_info.rma_lock); 588 + /* 589 + * The kernel probably zeroes these out but we still want 590 + * to clean up our own mess just in case. 591 + */ 592 + vma->vm_ops = NULL; 593 + vma->vm_private_data = NULL; 594 + kref_put(&vmapvt->ref, vma_pvt_release); 595 + scif_delete_vma(ep, vma); 596 + } 597 + 598 + static const struct vm_operations_struct scif_vm_ops = { 599 + .open = scif_vma_open, 600 + .close = scif_munmap, 601 + }; 602 + 603 + /** 604 + * scif_mmap - Map pages in virtual address space to a remote window. 605 + * @vma: VMM memory area. 606 + * @epd: endpoint descriptor 607 + * 608 + * Return: Upon successful completion, scif_mmap() returns zero 609 + * else an apt error is returned as documented in scif.h 610 + */ 611 + int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd) 612 + { 613 + struct scif_rma_req req; 614 + struct scif_window *window = NULL; 615 + struct scif_endpt *ep = (struct scif_endpt *)epd; 616 + s64 start_offset = vma->vm_pgoff << PAGE_SHIFT; 617 + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 618 + int err; 619 + struct vma_pvt *vmapvt; 620 + 621 + dev_dbg(scif_info.mdev.this_device, 622 + "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n", 623 + ep, start_offset, nr_pages); 624 + err = scif_verify_epd(ep); 625 + if (err) 626 + return err; 627 + 628 + might_sleep(); 629 + 630 + err = scif_insert_vma(ep, vma); 631 + if (err) 632 + return err; 633 + 634 + vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL); 635 + if (!vmapvt) { 636 + scif_delete_vma(ep, vma); 637 + return -ENOMEM; 638 + } 639 + 640 + vmapvt->ep = ep; 641 + kref_init(&vmapvt->ref); 642 + 643 + req.out_window = &window; 644 + req.offset = start_offset; 645 + req.nr_bytes = vma->vm_end - vma->vm_start; 646 + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); 647 + req.type = SCIF_WINDOW_PARTIAL; 648 + req.head = &ep->rma_info.remote_reg_list; 649 + 650 + mutex_lock(&ep->rma_info.rma_lock); 651 + /* Does a valid window exist? */ 652 + err = scif_query_window(&req); 653 + if (err) { 654 + dev_err(&ep->remote_dev->sdev->dev, 655 + "%s %d err %d\n", __func__, __LINE__, err); 656 + goto error_unlock; 657 + } 658 + 659 + /* Default prot for loopback */ 660 + if (!scifdev_self(ep->remote_dev)) 661 + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 662 + 663 + /* 664 + * VM_DONTCOPY - Do not copy this vma on fork 665 + * VM_DONTEXPAND - Cannot expand with mremap() 666 + * VM_RESERVED - Count as reserved_vm like IO 667 + * VM_PFNMAP - Page-ranges managed without "struct page" 668 + * VM_IO - Memory mapped I/O or similar 669 + * 670 + * We do not want to copy this VMA automatically on a fork(), 671 + * expand this VMA due to mremap() or swap out these pages since 672 + * the VMA is actually backed by physical pages in the remote 673 + * node's physical memory and not via a struct page. 674 + */ 675 + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; 676 + 677 + if (!scifdev_self(ep->remote_dev)) 678 + vma->vm_flags |= VM_IO | VM_PFNMAP; 679 + 680 + /* Map this range of windows */ 681 + err = scif_rma_list_mmap(window, start_offset, nr_pages, vma); 682 + if (err) { 683 + dev_err(&ep->remote_dev->sdev->dev, 684 + "%s %d err %d\n", __func__, __LINE__, err); 685 + goto error_unlock; 686 + } 687 + /* Set up the driver call back */ 688 + vma->vm_ops = &scif_vm_ops; 689 + vma->vm_private_data = vmapvt; 690 + error_unlock: 691 + mutex_unlock(&ep->rma_info.rma_lock); 692 + if (err) { 693 + kfree(vmapvt); 694 + dev_err(&ep->remote_dev->sdev->dev, 695 + "%s %d err %d\n", __func__, __LINE__, err); 696 + scif_delete_vma(ep, vma); 697 + } 698 + return err; 699 + }