Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

misc: mic: SCIF RMA header file and IOCTL changes

This patch updates the SCIF header file and IOCTL interface with the
changes required to support RMAs. APIs added include the ability to
pin pages and register those pages with SCIF. SCIF kernel clients can
also add references to remote registered pages and access them via the
CPU. The user space IOCTL interface has been updated to enable SCIF
registration, RDMA/CPU copies and fence APIs for RDMA synchronization.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Sudeep Dutt and committed by
Greg Kroah-Hartman
a44f2630 f4a66c20

+310 -11
+225 -11
include/linux/scif.h
··· 93 93 #define SCIF_PORT_RSVD 1088 94 94 95 95 typedef struct scif_endpt *scif_epd_t; 96 + typedef struct scif_pinned_pages *scif_pinned_pages_t; 97 + 98 + /** 99 + * struct scif_range - SCIF registered range used in kernel mode 100 + * @cookie: cookie used internally by SCIF 101 + * @nr_pages: number of pages of PAGE_SIZE 102 + * @prot_flags: R/W protection 103 + * @phys_addr: Array of bus addresses 104 + * @va: Array of kernel virtual addresses backed by the pages in the phys_addr 105 + * array. The va is populated only when called on the host for a remote 106 + * SCIF connection on MIC. This is required to support the use case of DMA 107 + * between MIC and another device which is not a SCIF node e.g., an IB or 108 + * ethernet NIC. 109 + */ 110 + struct scif_range { 111 + void *cookie; 112 + int nr_pages; 113 + int prot_flags; 114 + dma_addr_t *phys_addr; 115 + void __iomem **va; 116 + }; 96 117 97 118 /** 98 119 * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll ··· 410 389 * Errors: 411 390 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 412 391 * ECONNRESET - Connection reset by peer 413 - * EFAULT - An invalid address was specified for a parameter 414 392 * EINVAL - flags is invalid, or len is negative 415 393 * ENODEV - The remote node is lost or existed, but is not currently in the 416 394 * network since it may have crashed ··· 462 442 * EAGAIN - The destination node is returning from a low power state 463 443 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 464 444 * ECONNRESET - Connection reset by peer 465 - * EFAULT - An invalid address was specified for a parameter 466 445 * EINVAL - flags is invalid, or len is negative 467 446 * ENODEV - The remote node is lost or existed, but is not currently in the 468 447 * network since it may have crashed ··· 524 505 * SCIF_PROT_READ - allow read operations from the window 525 506 * SCIF_PROT_WRITE - allow write operations to the window 526 507 * 527 - * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a 528 - * fixed offset. 529 - * 530 508 * Return: 531 509 * Upon successful completion, scif_register() returns the offset at which the 532 510 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that ··· 536 520 * EAGAIN - The mapping could not be performed due to lack of resources 537 521 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 538 522 * ECONNRESET - Connection reset by peer 539 - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 540 523 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is 541 524 * set in flags, and offset is not a multiple of the page size, or addr is not a 542 525 * multiple of the page size, or len is not a multiple of the page size, or is ··· 818 803 * EACCESS - Attempt to write to a read-only range 819 804 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 820 805 * ECONNRESET - Connection reset by peer 821 - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 822 806 * EINVAL - rma_flags is invalid 823 807 * ENODEV - The remote node is lost or existed, but is not currently in the 824 808 * network since it may have crashed ··· 898 884 * EACCESS - Attempt to write to a read-only range 899 885 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 900 886 * ECONNRESET - Connection reset by peer 901 - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid 902 887 * EINVAL - rma_flags is invalid 903 888 * ENODEV - The remote node is lost or existed, but is not currently in the 904 889 * network since it may have crashed ··· 1041 1028 * online nodes in the SCIF network including 'self'; otherwise in user mode 1042 1029 * -1 is returned and errno is set to indicate the error; in kernel mode no 1043 1030 * errors are returned. 1044 - * 1045 - * Errors: 1046 - * EFAULT - Bad address 1047 1031 */ 1048 1032 int scif_get_node_ids(u16 *nodes, int len, u16 *self); 1033 + 1034 + /** 1035 + * scif_pin_pages() - Pin a set of pages 1036 + * @addr: Virtual address of range to pin 1037 + * @len: Length of range to pin 1038 + * @prot_flags: Page protection flags 1039 + * @map_flags: Page classification flags 1040 + * @pinned_pages: Handle to pinned pages 1041 + * 1042 + * scif_pin_pages() pins (locks in physical memory) the physical pages which 1043 + * back the range of virtual address pages starting at addr and continuing for 1044 + * len bytes. addr and len are constrained to be multiples of the page size. A 1045 + * successful scif_pin_pages() call returns a handle to pinned_pages which may 1046 + * be used in subsequent calls to scif_register_pinned_pages(). 1047 + * 1048 + * The pages will remain pinned as long as there is a reference against the 1049 + * scif_pinned_pages_t value returned by scif_pin_pages() and until 1050 + * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A 1051 + * reference is added to a scif_pinned_pages_t value each time a window is 1052 + * created by calling scif_register_pinned_pages() and passing the 1053 + * scif_pinned_pages_t value. A reference is removed from a 1054 + * scif_pinned_pages_t value each time such a window is deleted. 1055 + * 1056 + * Subsequent operations which change the memory pages to which virtual 1057 + * addresses are mapped (such as mmap(), munmap()) have no effect on the 1058 + * scif_pinned_pages_t value or windows created against it. 1059 + * 1060 + * If the process will fork(), it is recommended that the registered 1061 + * virtual address range be marked with MADV_DONTFORK. Doing so will prevent 1062 + * problems due to copy-on-write semantics. 1063 + * 1064 + * The prot_flags argument is formed by OR'ing together one or more of the 1065 + * following values. 1066 + * SCIF_PROT_READ - allow read operations against the pages 1067 + * SCIF_PROT_WRITE - allow write operations against the pages 1068 + * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a 1069 + * kernel space address. By default, addr is interpreted as a user space 1070 + * address. 1071 + * 1072 + * Return: 1073 + * Upon successful completion, scif_pin_pages() returns 0; otherwise the 1074 + * negative of one of the following errors is returned. 1075 + * 1076 + * Errors: 1077 + * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative 1078 + * ENOMEM - Not enough space 1079 + */ 1080 + int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, 1081 + scif_pinned_pages_t *pinned_pages); 1082 + 1083 + /** 1084 + * scif_unpin_pages() - Unpin a set of pages 1085 + * @pinned_pages: Handle to pinned pages to be unpinned 1086 + * 1087 + * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new 1088 + * windows against pinned_pages. The physical pages represented by pinned_pages 1089 + * will remain pinned until all windows previously registered against 1090 + * pinned_pages are deleted (the window is scif_unregister()'d and all 1091 + * references to the window are removed (see scif_unregister()). 1092 + * 1093 + * pinned_pages must have been obtain from a previous call to scif_pin_pages(). 1094 + * After calling scif_unpin_pages(), it is an error to pass pinned_pages to 1095 + * scif_register_pinned_pages(). 1096 + * 1097 + * Return: 1098 + * Upon successful completion, scif_unpin_pages() returns 0; otherwise the 1099 + * negative of one of the following errors is returned. 1100 + * 1101 + * Errors: 1102 + * EINVAL - pinned_pages is not valid 1103 + */ 1104 + int scif_unpin_pages(scif_pinned_pages_t pinned_pages); 1105 + 1106 + /** 1107 + * scif_register_pinned_pages() - Mark a memory region for remote access. 1108 + * @epd: endpoint descriptor 1109 + * @pinned_pages: Handle to pinned pages 1110 + * @offset: Registered address space offset 1111 + * @map_flags: Flags which control where pages are mapped 1112 + * 1113 + * The scif_register_pinned_pages() function opens a window, a range of whole 1114 + * pages of the registered address space of the endpoint epd, starting at 1115 + * offset po. The value of po, further described below, is a function of the 1116 + * parameters offset and pinned_pages, and the value of map_flags. Each page of 1117 + * the window represents a corresponding physical memory page of the range 1118 + * represented by pinned_pages; the length of the window is the same as the 1119 + * length of range represented by pinned_pages. A successful 1120 + * scif_register_pinned_pages() call returns po as the return value. 1121 + * 1122 + * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset 1123 + * exactly, and offset is constrained to be a multiple of the page size. The 1124 + * mapping established by scif_register_pinned_pages() will not replace any 1125 + * existing registration; an error is returned if any page of the new window 1126 + * would intersect an existing window. 1127 + * 1128 + * When SCIF_MAP_FIXED is not set, the implementation uses offset in an 1129 + * implementation-defined manner to arrive at po. The po so chosen will be an 1130 + * area of the registered address space that the implementation deems suitable 1131 + * for a mapping of the required size. An offset value of 0 is interpreted as 1132 + * granting the implementation complete freedom in selecting po, subject to 1133 + * constraints described below. A non-zero value of offset is taken to be a 1134 + * suggestion of an offset near which the mapping should be placed. When the 1135 + * implementation selects a value for po, it does not replace any extant 1136 + * window. In all cases, po will be a multiple of the page size. 1137 + * 1138 + * The physical pages which are so represented by a window are available for 1139 + * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), 1140 + * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the 1141 + * physical pages represented by the window will not be reused by the memory 1142 + * subsystem for any other purpose. Note that the same physical page may be 1143 + * represented by multiple windows. 1144 + * 1145 + * Windows created by scif_register_pinned_pages() are unregistered by 1146 + * scif_unregister(). 1147 + * 1148 + * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a 1149 + * fixed offset. 1150 + * 1151 + * Return: 1152 + * Upon successful completion, scif_register_pinned_pages() returns the offset 1153 + * at which the mapping was placed (po); otherwise the negative of one of the 1154 + * following errors is returned. 1155 + * 1156 + * Errors: 1157 + * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window 1158 + * would intersect an existing window 1159 + * EAGAIN - The mapping could not be performed due to lack of resources 1160 + * ECONNRESET - Connection reset by peer 1161 + * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and 1162 + * offset is not a multiple of the page size, or offset is negative 1163 + * ENODEV - The remote node is lost or existed, but is not currently in the 1164 + * network since it may have crashed 1165 + * ENOMEM - Not enough space 1166 + * ENOTCONN - The endpoint is not connected 1167 + */ 1168 + off_t scif_register_pinned_pages(scif_epd_t epd, 1169 + scif_pinned_pages_t pinned_pages, 1170 + off_t offset, int map_flags); 1171 + 1172 + /** 1173 + * scif_get_pages() - Add references to remote registered pages 1174 + * @epd: endpoint descriptor 1175 + * @offset: remote registered offset 1176 + * @len: length of range of pages 1177 + * @pages: returned scif_range structure 1178 + * 1179 + * scif_get_pages() returns the addresses of the physical pages represented by 1180 + * those pages of the registered address space of the peer of epd, starting at 1181 + * offset and continuing for len bytes. offset and len are constrained to be 1182 + * multiples of the page size. 1183 + * 1184 + * All of the pages in the specified range [offset, offset + len - 1] must be 1185 + * within a single window of the registered address space of the peer of epd. 1186 + * 1187 + * The addresses are returned as a virtually contiguous array pointed to by the 1188 + * phys_addr component of the scif_range structure whose address is returned in 1189 + * pages. The nr_pages component of scif_range is the length of the array. The 1190 + * prot_flags component of scif_range holds the protection flag value passed 1191 + * when the pages were registered. 1192 + * 1193 + * Each physical page whose address is returned by scif_get_pages() remains 1194 + * available and will not be released for reuse until the scif_range structure 1195 + * is returned in a call to scif_put_pages(). The scif_range structure returned 1196 + * by scif_get_pages() must be unmodified. 1197 + * 1198 + * It is an error to call scif_close() on an endpoint on which a scif_range 1199 + * structure of that endpoint has not been returned to scif_put_pages(). 1200 + * 1201 + * Return: 1202 + * Upon successful completion, scif_get_pages() returns 0; otherwise the 1203 + * negative of one of the following errors is returned. 1204 + * Errors: 1205 + * ECONNRESET - Connection reset by peer. 1206 + * EINVAL - offset is not a multiple of the page size, or offset is negative, or 1207 + * len is not a multiple of the page size 1208 + * ENODEV - The remote node is lost or existed, but is not currently in the 1209 + * network since it may have crashed 1210 + * ENOTCONN - The endpoint is not connected 1211 + * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid 1212 + * for the registered address space of the peer epd 1213 + */ 1214 + int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, 1215 + struct scif_range **pages); 1216 + 1217 + /** 1218 + * scif_put_pages() - Remove references from remote registered pages 1219 + * @pages: pages to be returned 1220 + * 1221 + * scif_put_pages() releases a scif_range structure previously obtained by 1222 + * calling scif_get_pages(). The physical pages represented by pages may 1223 + * be reused when the window which represented those pages is unregistered. 1224 + * Therefore, those pages must not be accessed after calling scif_put_pages(). 1225 + * 1226 + * Return: 1227 + * Upon successful completion, scif_put_pages() returns 0; otherwise the 1228 + * negative of one of the following errors is returned. 1229 + * Errors: 1230 + * EINVAL - pages does not point to a valid scif_range structure, or 1231 + * the scif_range structure pointed to by pages was already returned 1232 + * ENODEV - The remote node is lost or existed, but is not currently in the 1233 + * network since it may have crashed 1234 + * ENOTCONN - The endpoint is not connected 1235 + */ 1236 + int scif_put_pages(struct scif_range *pages); 1049 1237 1050 1238 /** 1051 1239 * scif_poll() - Wait for some event on an endpoint
+85
include/uapi/linux/scif_ioctl.h
··· 107 107 }; 108 108 109 109 /** 110 + * struct scifioctl_reg - used for SCIF_REG IOCTL 111 + * @addr: starting virtual address 112 + * @len: length of range 113 + * @offset: offset of window 114 + * @prot: read/write protection 115 + * @flags: flags 116 + * @out_offset: offset returned 117 + */ 118 + struct scifioctl_reg { 119 + __u64 addr; 120 + __u64 len; 121 + __s64 offset; 122 + __s32 prot; 123 + __s32 flags; 124 + __s64 out_offset; 125 + }; 126 + 127 + /** 128 + * struct scifioctl_unreg - used for SCIF_UNREG IOCTL 129 + * @offset: start of range to unregister 130 + * @len: length of range to unregister 131 + */ 132 + struct scifioctl_unreg { 133 + __s64 offset; 134 + __u64 len; 135 + }; 136 + 137 + /** 138 + * struct scifioctl_copy - used for SCIF DMA copy IOCTLs 139 + * 140 + * @loffset: offset in local registered address space to/from 141 + * which to copy 142 + * @len: length of range to copy 143 + * @roffset: offset in remote registered address space to/from 144 + * which to copy 145 + * @addr: user virtual address to/from which to copy 146 + * @flags: flags 147 + * 148 + * This structure is used for SCIF_READFROM, SCIF_WRITETO, SCIF_VREADFROM 149 + * and SCIF_VREADFROM IOCTL's. 150 + */ 151 + struct scifioctl_copy { 152 + __s64 loffset; 153 + __u64 len; 154 + __s64 roffset; 155 + __u64 addr; 156 + __s32 flags; 157 + }; 158 + 159 + /** 160 + * struct scifioctl_fence_mark - used for SCIF_FENCE_MARK IOCTL 161 + * @flags: flags 162 + * @mark: fence handle which is a pointer to a __s32 163 + */ 164 + struct scifioctl_fence_mark { 165 + __s32 flags; 166 + __u64 mark; 167 + }; 168 + 169 + /** 170 + * struct scifioctl_fence_signal - used for SCIF_FENCE_SIGNAL IOCTL 171 + * @loff: local offset 172 + * @lval: value to write to loffset 173 + * @roff: remote offset 174 + * @rval: value to write to roffset 175 + * @flags: flags 176 + */ 177 + struct scifioctl_fence_signal { 178 + __s64 loff; 179 + __u64 lval; 180 + __s64 roff; 181 + __u64 rval; 182 + __s32 flags; 183 + }; 184 + 185 + /** 110 186 * struct scifioctl_node_ids - used for SCIF_GET_NODEIDS IOCTL 111 187 * @nodes: pointer to an array of node_ids 112 188 * @self: ID of the current node ··· 201 125 #define SCIF_ACCEPTREG _IOWR('s', 5, __u64) 202 126 #define SCIF_SEND _IOWR('s', 6, struct scifioctl_msg) 203 127 #define SCIF_RECV _IOWR('s', 7, struct scifioctl_msg) 128 + #define SCIF_REG _IOWR('s', 8, struct scifioctl_reg) 129 + #define SCIF_UNREG _IOWR('s', 9, struct scifioctl_unreg) 130 + #define SCIF_READFROM _IOWR('s', 10, struct scifioctl_copy) 131 + #define SCIF_WRITETO _IOWR('s', 11, struct scifioctl_copy) 132 + #define SCIF_VREADFROM _IOWR('s', 12, struct scifioctl_copy) 133 + #define SCIF_VWRITETO _IOWR('s', 13, struct scifioctl_copy) 204 134 #define SCIF_GET_NODEIDS _IOWR('s', 14, struct scifioctl_node_ids) 135 + #define SCIF_FENCE_MARK _IOWR('s', 15, struct scifioctl_fence_mark) 136 + #define SCIF_FENCE_WAIT _IOWR('s', 16, __s32) 137 + #define SCIF_FENCE_SIGNAL _IOWR('s', 17, struct scifioctl_fence_signal) 205 138 206 139 #endif /* SCIF_IOCTL_H */