at v4.3 45 kB view raw
1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2014 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * BSD LICENSE 21 * 22 * Copyright(c) 2014 Intel Corporation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 28 * * Redistributions of source code must retain the above copyright 29 * notice, this list of conditions and the following disclaimer. 30 * * Redistributions in binary form must reproduce the above copyright 31 * notice, this list of conditions and the following disclaimer in 32 * the documentation and/or other materials provided with the 33 * distribution. 34 * * Neither the name of Intel Corporation nor the names of its 35 * contributors may be used to endorse or promote products derived 36 * from this software without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 39 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 40 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 41 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 42 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 45 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 46 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 47 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 48 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 49 * 50 * Intel SCIF driver. 51 * 52 */ 53#ifndef __SCIF_H__ 54#define __SCIF_H__ 55 56#include <linux/types.h> 57#include <linux/poll.h> 58#include <linux/scif_ioctl.h> 59 60#define SCIF_ACCEPT_SYNC 1 61#define SCIF_SEND_BLOCK 1 62#define SCIF_RECV_BLOCK 1 63 64enum { 65 SCIF_PROT_READ = (1 << 0), 66 SCIF_PROT_WRITE = (1 << 1) 67}; 68 69enum { 70 SCIF_MAP_FIXED = 0x10, 71 SCIF_MAP_KERNEL = 0x20, 72}; 73 74enum { 75 SCIF_FENCE_INIT_SELF = (1 << 0), 76 SCIF_FENCE_INIT_PEER = (1 << 1), 77 SCIF_SIGNAL_LOCAL = (1 << 4), 78 SCIF_SIGNAL_REMOTE = (1 << 5) 79}; 80 81enum { 82 SCIF_RMA_USECPU = (1 << 0), 83 SCIF_RMA_USECACHE = (1 << 1), 84 SCIF_RMA_SYNC = (1 << 2), 85 SCIF_RMA_ORDERED = (1 << 3) 86}; 87 88/* End of SCIF Admin Reserved Ports */ 89#define SCIF_ADMIN_PORT_END 1024 90 91/* End of SCIF Reserved Ports */ 92#define SCIF_PORT_RSVD 1088 93 94typedef struct scif_endpt *scif_epd_t; 95 96#define SCIF_OPEN_FAILED ((scif_epd_t)-1) 97#define SCIF_REGISTER_FAILED ((off_t)-1) 98#define SCIF_MMAP_FAILED ((void *)-1) 99 100/** 101 * scif_open() - Create an endpoint 102 * 103 * Return: 104 * Upon successful completion, scif_open() returns an endpoint descriptor to 105 * be used in subsequent SCIF functions calls to refer to that endpoint; 106 * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is 107 * returned and errno is set to indicate the error; in kernel mode a NULL 108 * scif_epd_t is returned. 109 * 110 * Errors: 111 * ENOMEM - Insufficient kernel memory was available 112 */ 113scif_epd_t scif_open(void); 114 115/** 116 * scif_bind() - Bind an endpoint to a port 117 * @epd: endpoint descriptor 118 * @pn: port number 119 * 120 * scif_bind() binds endpoint epd to port pn, where pn is a port number on the 121 * local node. If pn is zero, a port number greater than or equal to 122 * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to 123 * exactly one local port. Ports less than 1024 when requested can only be bound 124 * by system (or root) processes or by processes executed by privileged users. 125 * 126 * Return: 127 * Upon successful completion, scif_bind() returns the port number to which epd 128 * is bound; otherwise in user mode -1 is returned and errno is set to 129 * indicate the error; in kernel mode the negative of one of the following 130 * errors is returned. 131 * 132 * Errors: 133 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 134 * EINVAL - the endpoint or the port is already bound 135 * EISCONN - The endpoint is already connected 136 * ENOSPC - No port number available for assignment 137 * EACCES - The port requested is protected and the user is not the superuser 138 */ 139int scif_bind(scif_epd_t epd, u16 pn); 140 141/** 142 * scif_listen() - Listen for connections on an endpoint 143 * @epd: endpoint descriptor 144 * @backlog: maximum pending connection requests 145 * 146 * scif_listen() marks the endpoint epd as a listening endpoint - that is, as 147 * an endpoint that will be used to accept incoming connection requests. Once 148 * so marked, the endpoint is said to be in the listening state and may not be 149 * used as the endpoint of a connection. 150 * 151 * The endpoint, epd, must have been bound to a port. 152 * 153 * The backlog argument defines the maximum length to which the queue of 154 * pending connections for epd may grow. If a connection request arrives when 155 * the queue is full, the client may receive an error with an indication that 156 * the connection was refused. 157 * 158 * Return: 159 * Upon successful completion, scif_listen() returns 0; otherwise in user mode 160 * -1 is returned and errno is set to indicate the error; in kernel mode the 161 * negative of one of the following errors is returned. 162 * 163 * Errors: 164 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 165 * EINVAL - the endpoint is not bound to a port 166 * EISCONN - The endpoint is already connected or listening 167 */ 168int scif_listen(scif_epd_t epd, int backlog); 169 170/** 171 * scif_connect() - Initiate a connection on a port 172 * @epd: endpoint descriptor 173 * @dst: global id of port to which to connect 174 * 175 * The scif_connect() function requests the connection of endpoint epd to remote 176 * port dst. If the connection is successful, a peer endpoint, bound to dst, is 177 * created on node dst.node. On successful return, the connection is complete. 178 * 179 * If the endpoint epd has not already been bound to a port, scif_connect() 180 * will bind it to an unused local port. 181 * 182 * A connection is terminated when an endpoint of the connection is closed, 183 * either explicitly by scif_close(), or when a process that owns one of the 184 * endpoints of the connection is terminated. 185 * 186 * In user space, scif_connect() supports an asynchronous connection mode 187 * if the application has set the O_NONBLOCK flag on the endpoint via the 188 * fcntl() system call. Setting this flag will result in the calling process 189 * not to wait during scif_connect(). 190 * 191 * Return: 192 * Upon successful completion, scif_connect() returns the port ID to which the 193 * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is 194 * set to indicate the error; in kernel mode the negative of one of the 195 * following errors is returned. 196 * 197 * Errors: 198 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 199 * ECONNREFUSED - The destination was not listening for connections or refused 200 * the connection request 201 * EINVAL - dst.port is not a valid port ID 202 * EISCONN - The endpoint is already connected 203 * ENOMEM - No buffer space is available 204 * ENODEV - The destination node does not exist, or the node is lost or existed, 205 * but is not currently in the network since it may have crashed 206 * ENOSPC - No port number available for assignment 207 * EOPNOTSUPP - The endpoint is listening and cannot be connected 208 */ 209int scif_connect(scif_epd_t epd, struct scif_port_id *dst); 210 211/** 212 * scif_accept() - Accept a connection on an endpoint 213 * @epd: endpoint descriptor 214 * @peer: global id of port to which connected 215 * @newepd: new connected endpoint descriptor 216 * @flags: flags 217 * 218 * The scif_accept() call extracts the first connection request from the queue 219 * of pending connections for the port on which epd is listening. scif_accept() 220 * creates a new endpoint, bound to the same port as epd, and allocates a new 221 * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new 222 * endpoint is connected to the endpoint through which the connection was 223 * requested. epd is unaffected by this call, and remains in the listening 224 * state. 225 * 226 * On successful return, peer holds the global port identifier (node id and 227 * local port number) of the port which requested the connection. 228 * 229 * A connection is terminated when an endpoint of the connection is closed, 230 * either explicitly by scif_close(), or when a process that owns one of the 231 * endpoints of the connection is terminated. 232 * 233 * The number of connections that can (subsequently) be accepted on epd is only 234 * limited by system resources (memory). 235 * 236 * The flags argument is formed by OR'ing together zero or more of the 237 * following values. 238 * SCIF_ACCEPT_SYNC - block until a connection request is presented. If 239 * SCIF_ACCEPT_SYNC is not in flags, and no pending 240 * connections are present on the queue, scif_accept() 241 * fails with an EAGAIN error 242 * 243 * In user mode, the select() and poll() functions can be used to determine 244 * when there is a connection request. In kernel mode, the scif_poll() 245 * function may be used for this purpose. A readable event will be delivered 246 * when a connection is requested. 247 * 248 * Return: 249 * Upon successful completion, scif_accept() returns 0; otherwise in user mode 250 * -1 is returned and errno is set to indicate the error; in kernel mode the 251 * negative of one of the following errors is returned. 252 * 253 * Errors: 254 * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be 255 * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete 256 * its connection request 257 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 258 * EINTR - Interrupted function 259 * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is 260 * NULL, or newepd is NULL 261 * ENODEV - The requesting node is lost or existed, but is not currently in the 262 * network since it may have crashed 263 * ENOMEM - Not enough space 264 * ENOENT - Secondary part of epd registration failed 265 */ 266int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t 267 *newepd, int flags); 268 269/** 270 * scif_close() - Close an endpoint 271 * @epd: endpoint descriptor 272 * 273 * scif_close() closes an endpoint and performs necessary teardown of 274 * facilities associated with that endpoint. 275 * 276 * If epd is a listening endpoint then it will no longer accept connection 277 * requests on the port to which it is bound. Any pending connection requests 278 * are rejected. 279 * 280 * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs 281 * which are in-process through epd or its peer endpoint will complete before 282 * scif_close() returns. Registered windows of the local and peer endpoints are 283 * released as if scif_unregister() was called against each window. 284 * 285 * Closing a SCIF endpoint does not affect local registered memory mapped by 286 * a SCIF endpoint on a remote node. The local memory remains mapped by the peer 287 * SCIF endpoint explicitly removed by calling munmap(..) by the peer. 288 * 289 * If the peer endpoint's receive queue is not empty at the time that epd is 290 * closed, then the peer endpoint can be passed as the endpoint parameter to 291 * scif_recv() until the receive queue is empty. 292 * 293 * epd is freed and may no longer be accessed. 294 * 295 * Return: 296 * Upon successful completion, scif_close() returns 0; otherwise in user mode 297 * -1 is returned and errno is set to indicate the error; in kernel mode the 298 * negative of one of the following errors is returned. 299 * 300 * Errors: 301 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 302 */ 303int scif_close(scif_epd_t epd); 304 305/** 306 * scif_send() - Send a message 307 * @epd: endpoint descriptor 308 * @msg: message buffer address 309 * @len: message length 310 * @flags: blocking mode flags 311 * 312 * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data 313 * are copied from memory starting at address msg. On successful execution the 314 * return value of scif_send() is the number of bytes that were sent, and is 315 * zero if no bytes were sent because len was zero. scif_send() may be called 316 * only when the endpoint is in a connected state. 317 * 318 * If a scif_send() call is non-blocking, then it sends only those bytes which 319 * can be sent without waiting, up to a maximum of len bytes. 320 * 321 * If a scif_send() call is blocking, then it normally returns after sending 322 * all len bytes. If a blocking call is interrupted or the connection is 323 * reset, the call is considered successful if some bytes were sent or len is 324 * zero, otherwise the call is considered unsuccessful. 325 * 326 * In user mode, the select() and poll() functions can be used to determine 327 * when the send queue is not full. In kernel mode, the scif_poll() function 328 * may be used for this purpose. 329 * 330 * It is recommended that scif_send()/scif_recv() only be used for short 331 * control-type message communication between SCIF endpoints. The SCIF RMA 332 * APIs are expected to provide better performance for transfer sizes of 333 * 1024 bytes or longer for the current MIC hardware and software 334 * implementation. 335 * 336 * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK 337 * is passed as the flags argument. 338 * 339 * Return: 340 * Upon successful completion, scif_send() returns the number of bytes sent; 341 * otherwise in user mode -1 is returned and errno is set to indicate the 342 * error; in kernel mode the negative of one of the following errors is 343 * returned. 344 * 345 * Errors: 346 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 347 * ECONNRESET - Connection reset by peer 348 * EFAULT - An invalid address was specified for a parameter 349 * EINVAL - flags is invalid, or len is negative 350 * ENODEV - The remote node is lost or existed, but is not currently in the 351 * network since it may have crashed 352 * ENOMEM - Not enough space 353 * ENOTCONN - The endpoint is not connected 354 */ 355int scif_send(scif_epd_t epd, void *msg, int len, int flags); 356 357/** 358 * scif_recv() - Receive a message 359 * @epd: endpoint descriptor 360 * @msg: message buffer address 361 * @len: message buffer length 362 * @flags: blocking mode flags 363 * 364 * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of 365 * data are copied to memory starting at address msg. On successful execution 366 * the return value of scif_recv() is the number of bytes that were received, 367 * and is zero if no bytes were received because len was zero. scif_recv() may 368 * be called only when the endpoint is in a connected state. 369 * 370 * If a scif_recv() call is non-blocking, then it receives only those bytes 371 * which can be received without waiting, up to a maximum of len bytes. 372 * 373 * If a scif_recv() call is blocking, then it normally returns after receiving 374 * all len bytes. If the blocking call was interrupted due to a disconnection, 375 * subsequent calls to scif_recv() will copy all bytes received upto the point 376 * of disconnection. 377 * 378 * In user mode, the select() and poll() functions can be used to determine 379 * when data is available to be received. In kernel mode, the scif_poll() 380 * function may be used for this purpose. 381 * 382 * It is recommended that scif_send()/scif_recv() only be used for short 383 * control-type message communication between SCIF endpoints. The SCIF RMA 384 * APIs are expected to provide better performance for transfer sizes of 385 * 1024 bytes or longer for the current MIC hardware and software 386 * implementation. 387 * 388 * scif_recv() will block until the entire message is received if 389 * SCIF_RECV_BLOCK is passed as the flags argument. 390 * 391 * Return: 392 * Upon successful completion, scif_recv() returns the number of bytes 393 * received; otherwise in user mode -1 is returned and errno is set to 394 * indicate the error; in kernel mode the negative of one of the following 395 * errors is returned. 396 * 397 * Errors: 398 * EAGAIN - The destination node is returning from a low power state 399 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 400 * ECONNRESET - Connection reset by peer 401 * EFAULT - An invalid address was specified for a parameter 402 * EINVAL - flags is invalid, or len is negative 403 * ENODEV - The remote node is lost or existed, but is not currently in the 404 * network since it may have crashed 405 * ENOMEM - Not enough space 406 * ENOTCONN - The endpoint is not connected 407 */ 408int scif_recv(scif_epd_t epd, void *msg, int len, int flags); 409 410/** 411 * scif_register() - Mark a memory region for remote access. 412 * @epd: endpoint descriptor 413 * @addr: starting virtual address 414 * @len: length of range 415 * @offset: offset of window 416 * @prot_flags: read/write protection flags 417 * @map_flags: mapping flags 418 * 419 * The scif_register() function opens a window, a range of whole pages of the 420 * registered address space of the endpoint epd, starting at offset po and 421 * continuing for len bytes. The value of po, further described below, is a 422 * function of the parameters offset and len, and the value of map_flags. Each 423 * page of the window represents the physical memory page which backs the 424 * corresponding page of the range of virtual address pages starting at addr 425 * and continuing for len bytes. addr and len are constrained to be multiples 426 * of the page size. A successful scif_register() call returns po. 427 * 428 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset 429 * exactly, and offset is constrained to be a multiple of the page size. The 430 * mapping established by scif_register() will not replace any existing 431 * registration; an error is returned if any page within the range [offset, 432 * offset + len - 1] intersects an existing window. 433 * 434 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an 435 * implementation-defined manner to arrive at po. The po value so chosen will 436 * be an area of the registered address space that the implementation deems 437 * suitable for a mapping of len bytes. An offset value of 0 is interpreted as 438 * granting the implementation complete freedom in selecting po, subject to 439 * constraints described below. A non-zero value of offset is taken to be a 440 * suggestion of an offset near which the mapping should be placed. When the 441 * implementation selects a value for po, it does not replace any extant 442 * window. In all cases, po will be a multiple of the page size. 443 * 444 * The physical pages which are so represented by a window are available for 445 * access in calls to mmap(), scif_readfrom(), scif_writeto(), 446 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the 447 * physical pages represented by the window will not be reused by the memory 448 * subsystem for any other purpose. Note that the same physical page may be 449 * represented by multiple windows. 450 * 451 * Subsequent operations which change the memory pages to which virtual 452 * addresses are mapped (such as mmap(), munmap()) have no effect on 453 * existing window. 454 * 455 * If the process will fork(), it is recommended that the registered 456 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent 457 * problems due to copy-on-write semantics. 458 * 459 * The prot_flags argument is formed by OR'ing together one or more of the 460 * following values. 461 * SCIF_PROT_READ - allow read operations from the window 462 * SCIF_PROT_WRITE - allow write operations to the window 463 * 464 * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a 465 * fixed offset. 466 * 467 * Return: 468 * Upon successful completion, scif_register() returns the offset at which the 469 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that 470 * is (off_t *)-1) is returned and errno is set to indicate the error; in 471 * kernel mode the negative of one of the following errors is returned. 472 * 473 * Errors: 474 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range 475 * [offset, offset + len -1] are already registered 476 * EAGAIN - The mapping could not be performed due to lack of resources 477 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 478 * ECONNRESET - Connection reset by peer 479 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 480 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is 481 * set in flags, and offset is not a multiple of the page size, or addr is not a 482 * multiple of the page size, or len is not a multiple of the page size, or is 483 * 0, or offset is negative 484 * ENODEV - The remote node is lost or existed, but is not currently in the 485 * network since it may have crashed 486 * ENOMEM - Not enough space 487 * ENOTCONN -The endpoint is not connected 488 */ 489off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, 490 int prot_flags, int map_flags); 491 492/** 493 * scif_unregister() - Mark a memory region for remote access. 494 * @epd: endpoint descriptor 495 * @offset: start of range to unregister 496 * @len: length of range to unregister 497 * 498 * The scif_unregister() function closes those previously registered windows 499 * which are entirely within the range [offset, offset + len - 1]. It is an 500 * error to specify a range which intersects only a subrange of a window. 501 * 502 * On a successful return, pages within the window may no longer be specified 503 * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), 504 * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, 505 * however, continues to exist until all previous references against it are 506 * removed. A window is referenced if there is a mapping to it created by 507 * mmap(), or if scif_get_pages() was called against the window 508 * (and the pages have not been returned via scif_put_pages()). A window is 509 * also referenced while an RMA, in which some range of the window is a source 510 * or destination, is in progress. Finally a window is referenced while some 511 * offset in that window was specified to scif_fence_signal(), and the RMAs 512 * marked by that call to scif_fence_signal() have not completed. While a 513 * window is in this state, its registered address space pages are not 514 * available for use in a new registered window. 515 * 516 * When all such references to the window have been removed, its references to 517 * all the physical pages which it represents are removed. Similarly, the 518 * registered address space pages of the window become available for 519 * registration in a new window. 520 * 521 * Return: 522 * Upon successful completion, scif_unregister() returns 0; otherwise in user 523 * mode -1 is returned and errno is set to indicate the error; in kernel mode 524 * the negative of one of the following errors is returned. In the event of an 525 * error, no windows are unregistered. 526 * 527 * Errors: 528 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 529 * ECONNRESET - Connection reset by peer 530 * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a 531 * window, or offset is negative 532 * ENODEV - The remote node is lost or existed, but is not currently in the 533 * network since it may have crashed 534 * ENOTCONN - The endpoint is not connected 535 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the 536 * registered address space of epd 537 */ 538int scif_unregister(scif_epd_t epd, off_t offset, size_t len); 539 540/** 541 * scif_readfrom() - Copy from a remote address space 542 * @epd: endpoint descriptor 543 * @loffset: offset in local registered address space to 544 * which to copy 545 * @len: length of range to copy 546 * @roffset: offset in remote registered address space 547 * from which to copy 548 * @rma_flags: transfer mode flags 549 * 550 * scif_readfrom() copies len bytes from the remote registered address space of 551 * the peer of endpoint epd, starting at the offset roffset to the local 552 * registered address space of epd, starting at the offset loffset. 553 * 554 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, 555 * roffset + len - 1] must be within some registered window or windows of the 556 * local and remote nodes. A range may intersect multiple registered windows, 557 * but only if those windows are contiguous in the registered address space. 558 * 559 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 560 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 561 * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the 562 * transfer is complete. Otherwise, the transfer may be performed asynchron- 563 * ously. The order in which any two asynchronous RMA operations complete 564 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 565 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 566 * the completion of asynchronous RMA operations on the same endpoint. 567 * 568 * The DMA transfer of individual bytes is not guaranteed to complete in 569 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 570 * cacheline or partial cacheline of the source range will become visible on 571 * the destination node after all other transferred data in the source 572 * range has become visible on the destination node. 573 * 574 * The optimal DMA performance will likely be realized if both 575 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower 576 * performance will likely be realized if loffset and roffset are not 577 * cacheline aligned but are separated by some multiple of 64. The lowest level 578 * of performance is likely if loffset and roffset are not separated by a 579 * multiple of 64. 580 * 581 * The rma_flags argument is formed by ORing together zero or more of the 582 * following values. 583 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 584 * engine. 585 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 586 * transfer has completed. Passing this flag results in the 587 * current implementation busy waiting and consuming CPU cycles 588 * while the DMA transfer is in progress for best performance by 589 * avoiding the interrupt latency. 590 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 591 * the source range becomes visible on the destination node 592 * after all other transferred data in the source range has 593 * become visible on the destination 594 * 595 * Return: 596 * Upon successful completion, scif_readfrom() returns 0; otherwise in user 597 * mode -1 is returned and errno is set to indicate the error; in kernel mode 598 * the negative of one of the following errors is returned. 599 * 600 * Errors: 601 * EACCESS - Attempt to write to a read-only range 602 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 603 * ECONNRESET - Connection reset by peer 604 * EINVAL - rma_flags is invalid 605 * ENODEV - The remote node is lost or existed, but is not currently in the 606 * network since it may have crashed 607 * ENOTCONN - The endpoint is not connected 608 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered 609 * address space of epd, or, The range [roffset, roffset + len - 1] is invalid 610 * for the registered address space of the peer of epd, or loffset or roffset 611 * is negative 612 */ 613int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t 614 roffset, int rma_flags); 615 616/** 617 * scif_writeto() - Copy to a remote address space 618 * @epd: endpoint descriptor 619 * @loffset: offset in local registered address space 620 * from which to copy 621 * @len: length of range to copy 622 * @roffset: offset in remote registered address space to 623 * which to copy 624 * @rma_flags: transfer mode flags 625 * 626 * scif_writeto() copies len bytes from the local registered address space of 627 * epd, starting at the offset loffset to the remote registered address space 628 * of the peer of endpoint epd, starting at the offset roffset. 629 * 630 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, 631 * roffset + len - 1] must be within some registered window or windows of the 632 * local and remote nodes. A range may intersect multiple registered windows, 633 * but only if those windows are contiguous in the registered address space. 634 * 635 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 636 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 637 * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the 638 * transfer is complete. Otherwise, the transfer may be performed asynchron- 639 * ously. The order in which any two asynchronous RMA operations complete 640 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 641 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 642 * the completion of asynchronous RMA operations on the same endpoint. 643 * 644 * The DMA transfer of individual bytes is not guaranteed to complete in 645 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 646 * cacheline or partial cacheline of the source range will become visible on 647 * the destination node after all other transferred data in the source 648 * range has become visible on the destination node. 649 * 650 * The optimal DMA performance will likely be realized if both 651 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower 652 * performance will likely be realized if loffset and roffset are not cacheline 653 * aligned but are separated by some multiple of 64. The lowest level of 654 * performance is likely if loffset and roffset are not separated by a multiple 655 * of 64. 656 * 657 * The rma_flags argument is formed by ORing together zero or more of the 658 * following values. 659 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 660 * engine. 661 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 662 * transfer has completed. Passing this flag results in the 663 * current implementation busy waiting and consuming CPU cycles 664 * while the DMA transfer is in progress for best performance by 665 * avoiding the interrupt latency. 666 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 667 * the source range becomes visible on the destination node 668 * after all other transferred data in the source range has 669 * become visible on the destination 670 * 671 * Return: 672 * Upon successful completion, scif_readfrom() returns 0; otherwise in user 673 * mode -1 is returned and errno is set to indicate the error; in kernel mode 674 * the negative of one of the following errors is returned. 675 * 676 * Errors: 677 * EACCESS - Attempt to write to a read-only range 678 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 679 * ECONNRESET - Connection reset by peer 680 * EINVAL - rma_flags is invalid 681 * ENODEV - The remote node is lost or existed, but is not currently in the 682 * network since it may have crashed 683 * ENOTCONN - The endpoint is not connected 684 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered 685 * address space of epd, or, The range [roffset , roffset + len -1] is invalid 686 * for the registered address space of the peer of epd, or loffset or roffset 687 * is negative 688 */ 689int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t 690 roffset, int rma_flags); 691 692/** 693 * scif_vreadfrom() - Copy from a remote address space 694 * @epd: endpoint descriptor 695 * @addr: address to which to copy 696 * @len: length of range to copy 697 * @roffset: offset in remote registered address space 698 * from which to copy 699 * @rma_flags: transfer mode flags 700 * 701 * scif_vreadfrom() copies len bytes from the remote registered address 702 * space of the peer of endpoint epd, starting at the offset roffset, to local 703 * memory, starting at addr. 704 * 705 * The specified range [roffset, roffset + len - 1] must be within some 706 * registered window or windows of the remote nodes. The range may 707 * intersect multiple registered windows, but only if those windows are 708 * contiguous in the registered address space. 709 * 710 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 711 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 712 * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the 713 * transfer is complete. Otherwise, the transfer may be performed asynchron- 714 * ously. The order in which any two asynchronous RMA operations complete 715 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 716 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 717 * the completion of asynchronous RMA operations on the same endpoint. 718 * 719 * The DMA transfer of individual bytes is not guaranteed to complete in 720 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 721 * cacheline or partial cacheline of the source range will become visible on 722 * the destination node after all other transferred data in the source 723 * range has become visible on the destination node. 724 * 725 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back 726 * the specified local memory range may be remain in a pinned state even after 727 * the specified transfer completes. This may reduce overhead if some or all of 728 * the same virtual address range is referenced in a subsequent call of 729 * scif_vreadfrom() or scif_vwriteto(). 730 * 731 * The optimal DMA performance will likely be realized if both 732 * addr and roffset are cacheline aligned (are a multiple of 64). Lower 733 * performance will likely be realized if addr and roffset are not 734 * cacheline aligned but are separated by some multiple of 64. The lowest level 735 * of performance is likely if addr and roffset are not separated by a 736 * multiple of 64. 737 * 738 * The rma_flags argument is formed by ORing together zero or more of the 739 * following values. 740 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 741 * engine. 742 * SCIF_RMA_USECACHE - enable registration caching 743 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 744 * transfer has completed. Passing this flag results in the 745 * current implementation busy waiting and consuming CPU cycles 746 * while the DMA transfer is in progress for best performance by 747 * avoiding the interrupt latency. 748 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 749 * the source range becomes visible on the destination node 750 * after all other transferred data in the source range has 751 * become visible on the destination 752 * 753 * Return: 754 * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user 755 * mode -1 is returned and errno is set to indicate the error; in kernel mode 756 * the negative of one of the following errors is returned. 757 * 758 * Errors: 759 * EACCESS - Attempt to write to a read-only range 760 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 761 * ECONNRESET - Connection reset by peer 762 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 763 * EINVAL - rma_flags is invalid 764 * ENODEV - The remote node is lost or existed, but is not currently in the 765 * network since it may have crashed 766 * ENOTCONN - The endpoint is not connected 767 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the 768 * registered address space of epd 769 */ 770int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, 771 int rma_flags); 772 773/** 774 * scif_vwriteto() - Copy to a remote address space 775 * @epd: endpoint descriptor 776 * @addr: address from which to copy 777 * @len: length of range to copy 778 * @roffset: offset in remote registered address space to 779 * which to copy 780 * @rma_flags: transfer mode flags 781 * 782 * scif_vwriteto() copies len bytes from the local memory, starting at addr, to 783 * the remote registered address space of the peer of endpoint epd, starting at 784 * the offset roffset. 785 * 786 * The specified range [roffset, roffset + len - 1] must be within some 787 * registered window or windows of the remote nodes. The range may intersect 788 * multiple registered windows, but only if those windows are contiguous in the 789 * registered address space. 790 * 791 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 792 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 793 * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the 794 * transfer is complete. Otherwise, the transfer may be performed asynchron- 795 * ously. The order in which any two asynchronous RMA operations complete 796 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 797 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 798 * the completion of asynchronous RMA operations on the same endpoint. 799 * 800 * The DMA transfer of individual bytes is not guaranteed to complete in 801 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 802 * cacheline or partial cacheline of the source range will become visible on 803 * the destination node after all other transferred data in the source 804 * range has become visible on the destination node. 805 * 806 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back 807 * the specified local memory range may be remain in a pinned state even after 808 * the specified transfer completes. This may reduce overhead if some or all of 809 * the same virtual address range is referenced in a subsequent call of 810 * scif_vreadfrom() or scif_vwriteto(). 811 * 812 * The optimal DMA performance will likely be realized if both 813 * addr and offset are cacheline aligned (are a multiple of 64). Lower 814 * performance will likely be realized if addr and offset are not cacheline 815 * aligned but are separated by some multiple of 64. The lowest level of 816 * performance is likely if addr and offset are not separated by a multiple of 817 * 64. 818 * 819 * The rma_flags argument is formed by ORing together zero or more of the 820 * following values. 821 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 822 * engine. 823 * SCIF_RMA_USECACHE - allow registration caching 824 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 825 * transfer has completed. Passing this flag results in the 826 * current implementation busy waiting and consuming CPU cycles 827 * while the DMA transfer is in progress for best performance by 828 * avoiding the interrupt latency. 829 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 830 * the source range becomes visible on the destination node 831 * after all other transferred data in the source range has 832 * become visible on the destination 833 * 834 * Return: 835 * Upon successful completion, scif_vwriteto() returns 0; otherwise in user 836 * mode -1 is returned and errno is set to indicate the error; in kernel mode 837 * the negative of one of the following errors is returned. 838 * 839 * Errors: 840 * EACCESS - Attempt to write to a read-only range 841 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 842 * ECONNRESET - Connection reset by peer 843 * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 844 * EINVAL - rma_flags is invalid 845 * ENODEV - The remote node is lost or existed, but is not currently in the 846 * network since it may have crashed 847 * ENOTCONN - The endpoint is not connected 848 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the 849 * registered address space of epd 850 */ 851int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, 852 int rma_flags); 853 854/** 855 * scif_fence_mark() - Mark previously issued RMAs 856 * @epd: endpoint descriptor 857 * @flags: control flags 858 * @mark: marked value returned as output. 859 * 860 * scif_fence_mark() returns after marking the current set of all uncompleted 861 * RMAs initiated through the endpoint epd or the current set of all 862 * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are 863 * marked with a value returned at mark. The application may subsequently call 864 * scif_fence_wait(), passing the value returned at mark, to await completion 865 * of all RMAs so marked. 866 * 867 * The flags argument has exactly one of the following values. 868 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint 869 * epd are marked 870 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer 871 * of endpoint epd are marked 872 * 873 * Return: 874 * Upon successful completion, scif_fence_mark() returns 0; otherwise in user 875 * mode -1 is returned and errno is set to indicate the error; in kernel mode 876 * the negative of one of the following errors is returned. 877 * 878 * Errors: 879 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 880 * ECONNRESET - Connection reset by peer 881 * EINVAL - flags is invalid 882 * ENODEV - The remote node is lost or existed, but is not currently in the 883 * network since it may have crashed 884 * ENOTCONN - The endpoint is not connected 885 * ENOMEM - Insufficient kernel memory was available 886 */ 887int scif_fence_mark(scif_epd_t epd, int flags, int *mark); 888 889/** 890 * scif_fence_wait() - Wait for completion of marked RMAs 891 * @epd: endpoint descriptor 892 * @mark: mark request 893 * 894 * scif_fence_wait() returns after all RMAs marked with mark have completed. 895 * The value passed in mark must have been obtained in a previous call to 896 * scif_fence_mark(). 897 * 898 * Return: 899 * Upon successful completion, scif_fence_wait() returns 0; otherwise in user 900 * mode -1 is returned and errno is set to indicate the error; in kernel mode 901 * the negative of one of the following errors is returned. 902 * 903 * Errors: 904 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 905 * ECONNRESET - Connection reset by peer 906 * ENODEV - The remote node is lost or existed, but is not currently in the 907 * network since it may have crashed 908 * ENOTCONN - The endpoint is not connected 909 * ENOMEM - Insufficient kernel memory was available 910 */ 911int scif_fence_wait(scif_epd_t epd, int mark); 912 913/** 914 * scif_fence_signal() - Request a memory update on completion of RMAs 915 * @epd: endpoint descriptor 916 * @loff: local offset 917 * @lval: local value to write to loffset 918 * @roff: remote offset 919 * @rval: remote value to write to roffset 920 * @flags: flags 921 * 922 * scif_fence_signal() returns after marking the current set of all uncompleted 923 * RMAs initiated through the endpoint epd or marking the current set of all 924 * uncompleted RMAs initiated through the peer of endpoint epd. 925 * 926 * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the 927 * marked set, lval is written to memory at the address corresponding to offset 928 * loff in the local registered address space of epd. loff must be within a 929 * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion 930 * of the RMAs in the marked set, rval is written to memory at the address 931 * corresponding to offset roff in the remote registered address space of epd. 932 * roff must be within a remote registered window of the peer of epd. Note 933 * that any specified offset must be DWORD (4 byte / 32 bit) aligned. 934 * 935 * The flags argument is formed by OR'ing together the following. 936 * Exactly one of the following values. 937 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint 938 * epd are marked 939 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer 940 * of endpoint epd are marked 941 * One or more of the following values. 942 * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to 943 * memory at the address corresponding to offset loff in the local 944 * registered address space of epd. 945 * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to 946 * memory at the address corresponding to offset roff in the remote 947 * registered address space of epd. 948 * 949 * Return: 950 * Upon successful completion, scif_fence_signal() returns 0; otherwise in 951 * user mode -1 is returned and errno is set to indicate the error; in kernel 952 * mode the negative of one of the following errors is returned. 953 * 954 * Errors: 955 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 956 * ECONNRESET - Connection reset by peer 957 * EINVAL - flags is invalid, or loff or roff are not DWORD aligned 958 * ENODEV - The remote node is lost or existed, but is not currently in the 959 * network since it may have crashed 960 * ENOTCONN - The endpoint is not connected 961 * ENXIO - loff is invalid for the registered address of epd, or roff is invalid 962 * for the registered address space, of the peer of epd 963 */ 964int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, 965 u64 rval, int flags); 966 967/** 968 * scif_get_node_ids() - Return information about online nodes 969 * @nodes: array in which to return online node IDs 970 * @len: number of entries in the nodes array 971 * @self: address to place the node ID of the local node 972 * 973 * scif_get_node_ids() fills in the nodes array with up to len node IDs of the 974 * nodes in the SCIF network. If there is not enough space in nodes, as 975 * indicated by the len parameter, only len node IDs are returned in nodes. The 976 * return value of scif_get_node_ids() is the total number of nodes currently in 977 * the SCIF network. By checking the return value against the len parameter, 978 * the user may determine if enough space for nodes was allocated. 979 * 980 * The node ID of the local node is returned at self. 981 * 982 * Return: 983 * Upon successful completion, scif_get_node_ids() returns the actual number of 984 * online nodes in the SCIF network including 'self'; otherwise in user mode 985 * -1 is returned and errno is set to indicate the error; in kernel mode no 986 * errors are returned. 987 * 988 * Errors: 989 * EFAULT - Bad address 990 */ 991int scif_get_node_ids(u16 *nodes, int len, u16 *self); 992 993#endif /* __SCIF_H__ */