Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

misc: mic: SCIF RMA nodeqp and minor miscellaneous changes

This patch adds the SCIF kernel node QP control messages required to
enable SCIF RMAs. Examples of such node QP control messages include
registration, unregistration, remote memory allocation requests,
remote memory unmap and SCIF remote fence requests.

The patch also updates the SCIF driver with minor changes required to
enable SCIF RMAs by adding the new files to the build, initializing
RMA specific information during SCIF endpoint creation, reserving SCIF
DMA channels, initializing SCIF RMA specific global data structures,
adding the IOCTL hooks required for SCIF RMAs and updating RMA
specific debugfs hooks.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Sudeep Dutt and committed by
Greg Kroah-Hartman
d1824329 564c8d8d

+516 -40
+1
drivers/misc/mic/Kconfig
··· 75 75 config SCIF 76 76 tristate "SCIF Driver" 77 77 depends on 64BIT && PCI && X86 && SCIF_BUS 78 + select IOMMU_IOVA 78 79 help 79 80 This enables SCIF Driver support for the Intel Many Integrated 80 81 Core (MIC) family of PCIe form factor coprocessor devices that
+5
drivers/misc/mic/scif/Makefile
··· 13 13 scif-objs += scif_rb.o 14 14 scif-objs += scif_nodeqp.o 15 15 scif-objs += scif_nm.o 16 + scif-objs += scif_dma.o 17 + scif-objs += scif_fence.o 18 + scif-objs += scif_mmap.o 19 + scif-objs += scif_rma.o 20 + scif-objs += scif_rma_list.o
+27 -6
drivers/misc/mic/scif/scif_api.c
··· 70 70 mutex_init(&ep->sendlock); 71 71 mutex_init(&ep->recvlock); 72 72 73 + scif_rma_ep_init(ep); 73 74 ep->state = SCIFEP_UNBOUND; 74 75 dev_dbg(scif_info.mdev.this_device, 75 76 "SCIFAPI open: ep %p success\n", ep); ··· 185 184 186 185 switch (oldstate) { 187 186 case SCIFEP_ZOMBIE: 187 + dev_err(scif_info.mdev.this_device, 188 + "SCIFAPI close: zombie state unexpected\n"); 188 189 case SCIFEP_DISCONNECTED: 189 190 spin_unlock(&ep->lock); 191 + scif_unregister_all_windows(epd); 190 192 /* Remove from the disconnected list */ 191 193 mutex_lock(&scif_info.connlock); 192 194 list_for_each_safe(pos, tmpq, &scif_info.disconnected) { ··· 211 207 case SCIFEP_CLOSING: 212 208 { 213 209 spin_unlock(&ep->lock); 210 + scif_unregister_all_windows(epd); 214 211 scif_disconnect_ep(ep); 215 212 break; 216 213 } ··· 223 218 struct scif_endpt *aep; 224 219 225 220 spin_unlock(&ep->lock); 226 - spin_lock(&scif_info.eplock); 221 + mutex_lock(&scif_info.eplock); 227 222 228 223 /* remove from listen list */ 229 224 list_for_each_safe(pos, tmpq, &scif_info.listen) { ··· 245 240 break; 246 241 } 247 242 } 248 - spin_unlock(&scif_info.eplock); 243 + mutex_unlock(&scif_info.eplock); 249 244 mutex_lock(&scif_info.connlock); 250 245 list_for_each_safe(pos, tmpq, &scif_info.connected) { 251 246 tmpep = list_entry(pos, ··· 265 260 } 266 261 mutex_unlock(&scif_info.connlock); 267 262 scif_teardown_ep(aep); 268 - spin_lock(&scif_info.eplock); 263 + mutex_lock(&scif_info.eplock); 269 264 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); 270 265 ep->acceptcnt--; 271 266 } 272 267 273 268 spin_lock(&ep->lock); 274 - spin_unlock(&scif_info.eplock); 269 + mutex_unlock(&scif_info.eplock); 275 270 276 271 /* Remove and reject any pending connection requests. */ 277 272 while (ep->conreqcnt) { ··· 433 428 scif_teardown_ep(ep); 434 429 ep->qp_info.qp = NULL; 435 430 436 - spin_lock(&scif_info.eplock); 431 + mutex_lock(&scif_info.eplock); 437 432 list_add_tail(&ep->list, &scif_info.listen); 438 - spin_unlock(&scif_info.eplock); 433 + mutex_unlock(&scif_info.eplock); 439 434 return 0; 440 435 } 441 436 EXPORT_SYMBOL_GPL(scif_listen); ··· 474 469 struct scifmsg msg; 475 470 struct device *spdev; 476 471 472 + err = scif_reserve_dma_chan(ep); 473 + if (err) { 474 + dev_err(&ep->remote_dev->sdev->dev, 475 + "%s %d err %d\n", __func__, __LINE__, err); 476 + ep->state = SCIFEP_BOUND; 477 + goto connect_error_simple; 478 + } 477 479 /* Initiate the first part of the endpoint QP setup */ 478 480 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, 479 481 SCIF_ENDPT_QP_SIZE, ep->remote_dev); ··· 815 803 cep->state = SCIFEP_CONNECTING; 816 804 cep->remote_dev = &scif_dev[peer->node]; 817 805 cep->remote_ep = conreq->msg.payload[0]; 806 + 807 + scif_rma_ep_init(cep); 808 + 809 + err = scif_reserve_dma_chan(cep); 810 + if (err) { 811 + dev_err(scif_info.mdev.this_device, 812 + "%s %d err %d\n", __func__, __LINE__, err); 813 + goto scif_accept_error_qpalloc; 814 + } 818 815 819 816 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); 820 817 if (!cep->qp_info.qp) {
+81 -4
drivers/misc/mic/scif/scif_debugfs.c
··· 62 62 .release = scif_dev_test_release 63 63 }; 64 64 65 + static void scif_display_window(struct scif_window *window, struct seq_file *s) 66 + { 67 + int j; 68 + struct scatterlist *sg; 69 + scif_pinned_pages_t pin = window->pinned_pages; 70 + 71 + seq_printf(s, "window %p type %d temp %d offset 0x%llx ", 72 + window, window->type, window->temp, window->offset); 73 + seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ", 74 + window->nr_pages, window->nr_contig_chunks, window->prot); 75 + seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ", 76 + window->ref_count, window->magic, window->peer_window); 77 + seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n", 78 + window->unreg_state, window->va_for_temp); 79 + 80 + for (j = 0; j < window->nr_contig_chunks; j++) 81 + seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j, 82 + window->dma_addr[j], window->num_pages[j]); 83 + 84 + if (window->type == SCIF_WINDOW_SELF && pin) 85 + for (j = 0; j < window->nr_pages; j++) 86 + seq_printf(s, "page[%d] = pinned_pages %p address %p\n", 87 + j, pin->pages[j], 88 + page_address(pin->pages[j])); 89 + 90 + if (window->st) 91 + for_each_sg(window->st->sgl, sg, window->st->nents, j) 92 + seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n", 93 + j, sg_dma_address(sg), sg_dma_len(sg)); 94 + } 95 + 96 + static void scif_display_all_windows(struct list_head *head, struct seq_file *s) 97 + { 98 + struct list_head *item; 99 + struct scif_window *window; 100 + 101 + list_for_each(item, head) { 102 + window = list_entry(item, struct scif_window, list); 103 + scif_display_window(window, s); 104 + } 105 + } 106 + 107 + static int scif_rma_test(struct seq_file *s, void *unused) 108 + { 109 + struct scif_endpt *ep; 110 + struct list_head *pos; 111 + 112 + mutex_lock(&scif_info.connlock); 113 + list_for_each(pos, &scif_info.connected) { 114 + ep = list_entry(pos, struct scif_endpt, list); 115 + seq_printf(s, "ep %p self windows\n", ep); 116 + mutex_lock(&ep->rma_info.rma_lock); 117 + scif_display_all_windows(&ep->rma_info.reg_list, s); 118 + seq_printf(s, "ep %p remote windows\n", ep); 119 + scif_display_all_windows(&ep->rma_info.remote_reg_list, s); 120 + mutex_unlock(&ep->rma_info.rma_lock); 121 + } 122 + mutex_unlock(&scif_info.connlock); 123 + return 0; 124 + } 125 + 126 + static int scif_rma_test_open(struct inode *inode, struct file *file) 127 + { 128 + return single_open(file, scif_rma_test, inode->i_private); 129 + } 130 + 131 + static int scif_rma_test_release(struct inode *inode, struct file *file) 132 + { 133 + return single_release(inode, file); 134 + } 135 + 136 + static const struct file_operations scif_rma_ops = { 137 + .owner = THIS_MODULE, 138 + .open = scif_rma_test_open, 139 + .read = seq_read, 140 + .llseek = seq_lseek, 141 + .release = scif_rma_test_release 142 + }; 143 + 65 144 void __init scif_init_debugfs(void) 66 145 { 67 - struct dentry *d; 68 - 69 146 scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); 70 147 if (!scif_dbg) { 71 148 dev_err(scif_info.mdev.this_device, ··· 150 73 return; 151 74 } 152 75 153 - d = debugfs_create_file("scif_dev", 0444, scif_dbg, 154 - NULL, &scif_dev_ops); 76 + debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops); 77 + debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops); 155 78 debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); 156 79 debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); 157 80 }
+15 -11
drivers/misc/mic/scif/scif_epd.c
··· 65 65 void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) 66 66 { 67 67 if (!eplock_held) 68 - spin_lock(&scif_info.eplock); 68 + mutex_lock(&scif_info.eplock); 69 69 spin_lock(&ep->lock); 70 70 ep->state = SCIFEP_ZOMBIE; 71 71 spin_unlock(&ep->lock); 72 72 list_add_tail(&ep->list, &scif_info.zombie); 73 73 scif_info.nr_zombies++; 74 74 if (!eplock_held) 75 - spin_unlock(&scif_info.eplock); 75 + mutex_unlock(&scif_info.eplock); 76 76 schedule_work(&scif_info.misc_work); 77 77 } 78 78 ··· 81 81 struct scif_endpt *ep = NULL; 82 82 struct list_head *pos, *tmpq; 83 83 84 - spin_lock(&scif_info.eplock); 84 + mutex_lock(&scif_info.eplock); 85 85 list_for_each_safe(pos, tmpq, &scif_info.listen) { 86 86 ep = list_entry(pos, struct scif_endpt, list); 87 87 if (ep->port.port == port) { 88 - spin_lock(&ep->lock); 89 - spin_unlock(&scif_info.eplock); 88 + mutex_unlock(&scif_info.eplock); 90 89 return ep; 91 90 } 92 91 } 93 - spin_unlock(&scif_info.eplock); 92 + mutex_unlock(&scif_info.eplock); 94 93 return NULL; 95 94 } 96 95 ··· 98 99 struct list_head *pos, *tmpq; 99 100 struct scif_endpt *ep; 100 101 101 - spin_lock(&scif_info.eplock); 102 + mutex_lock(&scif_info.eplock); 102 103 list_for_each_safe(pos, tmpq, &scif_info.zombie) { 103 104 ep = list_entry(pos, struct scif_endpt, list); 104 - list_del(pos); 105 - scif_info.nr_zombies--; 106 - kfree(ep); 105 + if (scif_rma_ep_can_uninit(ep)) { 106 + list_del(pos); 107 + scif_info.nr_zombies--; 108 + put_iova_domain(&ep->rma_info.iovad); 109 + kfree(ep); 110 + } 107 111 } 108 - spin_unlock(&scif_info.eplock); 112 + mutex_unlock(&scif_info.eplock); 109 113 } 110 114 111 115 /** ··· 139 137 if (!ep) 140 138 /* Send reject due to no listening ports */ 141 139 goto conreq_sendrej_free; 140 + else 141 + spin_lock(&ep->lock); 142 142 143 143 if (ep->backlog <= ep->conreqcnt) { 144 144 /* Send reject due to too many pending requests */
+28
drivers/misc/mic/scif/scif_epd.h
··· 98 98 * @conn_async_state: Async connection 99 99 * @conn_pend_wq: Used by poll while waiting for incoming connections 100 100 * @conn_list: List of async connection requests 101 + * @rma_info: Information for triggering SCIF RMA and DMA operations 102 + * @mmu_list: link to list of MMU notifier cleanup work 101 103 * @anon: anonymous file for use in kernel mode scif poll 102 104 */ 103 105 struct scif_endpt { ··· 131 129 int conn_async_state; 132 130 wait_queue_head_t conn_pend_wq; 133 131 struct list_head conn_list; 132 + struct scif_endpt_rma_info rma_info; 133 + struct list_head mmu_list; 134 134 struct file *anon; 135 135 }; 136 136 137 137 static inline int scifdev_alive(struct scif_endpt *ep) 138 138 { 139 139 return _scifdev_alive(ep->remote_dev); 140 + } 141 + 142 + /* 143 + * scif_verify_epd: 144 + * ep: SCIF endpoint 145 + * 146 + * Checks several generic error conditions and returns the 147 + * appropriate error. 148 + */ 149 + static inline int scif_verify_epd(struct scif_endpt *ep) 150 + { 151 + if (ep->state == SCIFEP_DISCONNECTED) 152 + return -ECONNRESET; 153 + 154 + if (ep->state != SCIFEP_CONNECTED) 155 + return -ENOTCONN; 156 + 157 + if (!scifdev_alive(ep)) 158 + return -ENODEV; 159 + 160 + return 0; 140 161 } 141 162 142 163 static inline int scif_anon_inode_getfile(scif_epd_t epd) ··· 202 177 void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); 203 178 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); 204 179 int __scif_flush(scif_epd_t epd); 180 + int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); 205 181 unsigned int __scif_pollfd(struct file *f, poll_table *wait, 206 182 struct scif_endpt *ep); 183 + int __scif_pin_pages(void *addr, size_t len, int *out_prot, 184 + int map_flags, scif_pinned_pages_t *pages); 207 185 #endif /* SCIF_EPD_H */
+164 -5
drivers/misc/mic/scif/scif_fd.c
··· 34 34 return scif_close(priv); 35 35 } 36 36 37 + static int scif_fdmmap(struct file *f, struct vm_area_struct *vma) 38 + { 39 + struct scif_endpt *priv = f->private_data; 40 + 41 + return scif_mmap(vma, priv); 42 + } 43 + 37 44 static unsigned int scif_fdpoll(struct file *f, poll_table *wait) 38 45 { 39 46 struct scif_endpt *priv = f->private_data; ··· 154 147 * Add to the list of user mode eps where the second half 155 148 * of the accept is not yet completed. 156 149 */ 157 - spin_lock(&scif_info.eplock); 150 + mutex_lock(&scif_info.eplock); 158 151 list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); 159 152 list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); 160 153 (*ep)->listenep = priv; 161 154 priv->acceptcnt++; 162 - spin_unlock(&scif_info.eplock); 155 + mutex_unlock(&scif_info.eplock); 163 156 164 157 return 0; 165 158 } ··· 177 170 return -EFAULT; 178 171 179 172 /* Remove form the user accept queue */ 180 - spin_lock(&scif_info.eplock); 173 + mutex_lock(&scif_info.eplock); 181 174 list_for_each_safe(pos, tmpq, &scif_info.uaccept) { 182 175 tmpep = list_entry(pos, 183 176 struct scif_endpt, miacceptlist); ··· 189 182 } 190 183 191 184 if (!fep) { 192 - spin_unlock(&scif_info.eplock); 185 + mutex_unlock(&scif_info.eplock); 193 186 return -ENOENT; 194 187 } 195 188 ··· 204 197 } 205 198 } 206 199 207 - spin_unlock(&scif_info.eplock); 200 + mutex_unlock(&scif_info.eplock); 208 201 209 202 /* Free the resources automatically created from the open. */ 210 203 scif_anon_inode_fput(priv); ··· 305 298 getnodes_err2: 306 299 return err; 307 300 } 301 + case SCIF_REG: 302 + { 303 + struct scif_endpt *priv = f->private_data; 304 + struct scifioctl_reg reg; 305 + off_t ret; 306 + 307 + if (copy_from_user(&reg, argp, sizeof(reg))) { 308 + err = -EFAULT; 309 + goto reg_err; 310 + } 311 + if (reg.flags & SCIF_MAP_KERNEL) { 312 + err = -EINVAL; 313 + goto reg_err; 314 + } 315 + ret = scif_register(priv, (void *)reg.addr, reg.len, 316 + reg.offset, reg.prot, reg.flags); 317 + if (ret < 0) { 318 + err = (int)ret; 319 + goto reg_err; 320 + } 321 + 322 + if (copy_to_user(&((struct scifioctl_reg __user *)argp) 323 + ->out_offset, &ret, sizeof(reg.out_offset))) { 324 + err = -EFAULT; 325 + goto reg_err; 326 + } 327 + err = 0; 328 + reg_err: 329 + scif_err_debug(err, "scif_register"); 330 + return err; 331 + } 332 + case SCIF_UNREG: 333 + { 334 + struct scif_endpt *priv = f->private_data; 335 + struct scifioctl_unreg unreg; 336 + 337 + if (copy_from_user(&unreg, argp, sizeof(unreg))) { 338 + err = -EFAULT; 339 + goto unreg_err; 340 + } 341 + err = scif_unregister(priv, unreg.offset, unreg.len); 342 + unreg_err: 343 + scif_err_debug(err, "scif_unregister"); 344 + return err; 345 + } 346 + case SCIF_READFROM: 347 + { 348 + struct scif_endpt *priv = f->private_data; 349 + struct scifioctl_copy copy; 350 + 351 + if (copy_from_user(&copy, argp, sizeof(copy))) { 352 + err = -EFAULT; 353 + goto readfrom_err; 354 + } 355 + err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset, 356 + copy.flags); 357 + readfrom_err: 358 + scif_err_debug(err, "scif_readfrom"); 359 + return err; 360 + } 361 + case SCIF_WRITETO: 362 + { 363 + struct scif_endpt *priv = f->private_data; 364 + struct scifioctl_copy copy; 365 + 366 + if (copy_from_user(&copy, argp, sizeof(copy))) { 367 + err = -EFAULT; 368 + goto writeto_err; 369 + } 370 + err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset, 371 + copy.flags); 372 + writeto_err: 373 + scif_err_debug(err, "scif_writeto"); 374 + return err; 375 + } 376 + case SCIF_VREADFROM: 377 + { 378 + struct scif_endpt *priv = f->private_data; 379 + struct scifioctl_copy copy; 380 + 381 + if (copy_from_user(&copy, argp, sizeof(copy))) { 382 + err = -EFAULT; 383 + goto vreadfrom_err; 384 + } 385 + err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len, 386 + copy.roffset, copy.flags); 387 + vreadfrom_err: 388 + scif_err_debug(err, "scif_vreadfrom"); 389 + return err; 390 + } 391 + case SCIF_VWRITETO: 392 + { 393 + struct scif_endpt *priv = f->private_data; 394 + struct scifioctl_copy copy; 395 + 396 + if (copy_from_user(&copy, argp, sizeof(copy))) { 397 + err = -EFAULT; 398 + goto vwriteto_err; 399 + } 400 + err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len, 401 + copy.roffset, copy.flags); 402 + vwriteto_err: 403 + scif_err_debug(err, "scif_vwriteto"); 404 + return err; 405 + } 406 + case SCIF_FENCE_MARK: 407 + { 408 + struct scif_endpt *priv = f->private_data; 409 + struct scifioctl_fence_mark mark; 410 + int tmp_mark = 0; 411 + 412 + if (copy_from_user(&mark, argp, sizeof(mark))) { 413 + err = -EFAULT; 414 + goto fence_mark_err; 415 + } 416 + err = scif_fence_mark(priv, mark.flags, &tmp_mark); 417 + if (err) 418 + goto fence_mark_err; 419 + if (copy_to_user((void __user *)mark.mark, &tmp_mark, 420 + sizeof(tmp_mark))) { 421 + err = -EFAULT; 422 + goto fence_mark_err; 423 + } 424 + fence_mark_err: 425 + scif_err_debug(err, "scif_fence_mark"); 426 + return err; 427 + } 428 + case SCIF_FENCE_WAIT: 429 + { 430 + struct scif_endpt *priv = f->private_data; 431 + 432 + err = scif_fence_wait(priv, arg); 433 + scif_err_debug(err, "scif_fence_wait"); 434 + return err; 435 + } 436 + case SCIF_FENCE_SIGNAL: 437 + { 438 + struct scif_endpt *priv = f->private_data; 439 + struct scifioctl_fence_signal signal; 440 + 441 + if (copy_from_user(&signal, argp, sizeof(signal))) { 442 + err = -EFAULT; 443 + goto fence_signal_err; 444 + } 445 + 446 + err = scif_fence_signal(priv, signal.loff, signal.lval, 447 + signal.roff, signal.rval, signal.flags); 448 + fence_signal_err: 449 + scif_err_debug(err, "scif_fence_signal"); 450 + return err; 451 + } 308 452 } 309 453 return -EINVAL; 310 454 } ··· 464 306 .open = scif_fdopen, 465 307 .release = scif_fdclose, 466 308 .unlocked_ioctl = scif_fdioctl, 309 + .mmap = scif_fdmmap, 467 310 .poll = scif_fdpoll, 468 311 .flush = scif_fdflush, 469 312 .owner = THIS_MODULE,
+22 -1
drivers/misc/mic/scif/scif_main.c
··· 34 34 }; 35 35 36 36 struct scif_dev *scif_dev; 37 + struct kmem_cache *unaligned_cache; 37 38 static atomic_t g_loopb_cnt; 38 39 39 40 /* Runs in the context of intr_wq */ ··· 264 263 { 265 264 int rc; 266 265 267 - spin_lock_init(&scif_info.eplock); 266 + mutex_init(&scif_info.eplock); 267 + spin_lock_init(&scif_info.rmalock); 268 268 spin_lock_init(&scif_info.nb_connect_lock); 269 269 spin_lock_init(&scif_info.port_lock); 270 270 mutex_init(&scif_info.conflock); 271 271 mutex_init(&scif_info.connlock); 272 + mutex_init(&scif_info.fencelock); 272 273 INIT_LIST_HEAD(&scif_info.uaccept); 273 274 INIT_LIST_HEAD(&scif_info.listen); 274 275 INIT_LIST_HEAD(&scif_info.zombie); 275 276 INIT_LIST_HEAD(&scif_info.connected); 276 277 INIT_LIST_HEAD(&scif_info.disconnected); 278 + INIT_LIST_HEAD(&scif_info.rma); 279 + INIT_LIST_HEAD(&scif_info.rma_tc); 280 + INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup); 281 + INIT_LIST_HEAD(&scif_info.fence); 277 282 INIT_LIST_HEAD(&scif_info.nb_connect_list); 278 283 init_waitqueue_head(&scif_info.exitwq); 284 + scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT; 279 285 scif_info.en_msg_log = 0; 280 286 scif_info.p2p_enable = 1; 281 287 rc = scif_setup_scifdev(); 282 288 if (rc) 283 289 goto error; 290 + unaligned_cache = kmem_cache_create("Unaligned_DMA", 291 + SCIF_KMEM_UNALIGNED_BUF_SIZE, 292 + 0, SLAB_HWCACHE_ALIGN, NULL); 293 + if (!unaligned_cache) { 294 + rc = -ENOMEM; 295 + goto free_sdev; 296 + } 284 297 INIT_WORK(&scif_info.misc_work, scif_misc_handler); 298 + INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler); 285 299 INIT_WORK(&scif_info.conn_work, scif_conn_handler); 286 300 idr_init(&scif_ports); 287 301 return 0; 302 + free_sdev: 303 + scif_destroy_scifdev(); 288 304 error: 289 305 return rc; 290 306 } ··· 309 291 static void _scif_exit(void) 310 292 { 311 293 idr_destroy(&scif_ports); 294 + kmem_cache_destroy(unaligned_cache); 312 295 scif_destroy_scifdev(); 313 296 } 314 297 ··· 319 300 int rc; 320 301 321 302 _scif_init(); 303 + iova_cache_get(); 322 304 rc = scif_peer_bus_init(); 323 305 if (rc) 324 306 goto exit; ··· 346 326 misc_deregister(&scif_info.mdev); 347 327 scif_unregister_driver(&scif_driver); 348 328 scif_peer_bus_exit(); 329 + iova_cache_put(); 349 330 _scif_exit(); 350 331 } 351 332
+28 -2
drivers/misc/mic/scif/scif_main.h
··· 22 22 #include <linux/pci.h> 23 23 #include <linux/miscdevice.h> 24 24 #include <linux/dmaengine.h> 25 + #include <linux/iova.h> 25 26 #include <linux/anon_inodes.h> 26 27 #include <linux/file.h> 28 + #include <linux/vmalloc.h> 27 29 #include <linux/scif.h> 28 - 29 30 #include "../common/mic_dev.h" 30 31 31 32 #define SCIF_MGMT_NODE 0 32 33 #define SCIF_DEFAULT_WATCHDOG_TO 30 33 34 #define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) 34 35 #define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) 36 + #define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 35 37 36 38 /* 37 39 * Generic state used for certain node QP message exchanges ··· 76 74 * @loopb_work: Used for submitting work to loopb_wq 77 75 * @loopb_recv_q: List of messages received on the loopb_wq 78 76 * @card_initiated_exit: set when the card has initiated the exit 77 + * @rmalock: Synchronize access to RMA operations 78 + * @fencelock: Synchronize access to list of remote fences requested. 79 + * @rma: List of temporary registered windows to be destroyed. 80 + * @rma_tc: List of temporary registered & cached Windows to be destroyed 81 + * @fence: List of remote fence requests 82 + * @mmu_notif_work: Work for registration caching MMU notifier workqueue 83 + * @mmu_notif_cleanup: List of temporary cached windows for reg cache 84 + * @rma_tc_limit: RMA temporary cache limit 79 85 */ 80 86 struct scif_info { 81 87 u8 nodeid; 82 88 u8 maxid; 83 89 u8 total; 84 90 u32 nr_zombies; 85 - spinlock_t eplock; 91 + struct mutex eplock; 86 92 struct mutex connlock; 87 93 spinlock_t nb_connect_lock; 88 94 spinlock_t port_lock; ··· 113 103 struct work_struct loopb_work; 114 104 struct list_head loopb_recv_q; 115 105 bool card_initiated_exit; 106 + spinlock_t rmalock; 107 + struct mutex fencelock; 108 + struct list_head rma; 109 + struct list_head rma_tc; 110 + struct list_head fence; 111 + struct work_struct mmu_notif_work; 112 + struct list_head mmu_notif_cleanup; 113 + unsigned long rma_tc_limit; 116 114 }; 117 115 118 116 /* ··· 171 153 * @disconn_rescnt: Keeps track of number of node remove requests sent 172 154 * @exit: Status of exit message 173 155 * @qp_dma_addr: Queue pair DMA address passed to the peer 156 + * @dma_ch_idx: Round robin index for DMA channels 157 + * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's 174 158 */ 175 159 struct scif_dev { 176 160 u8 node; ··· 199 179 atomic_t disconn_rescnt; 200 180 enum scif_msg_state exit; 201 181 dma_addr_t qp_dma_addr; 182 + int dma_ch_idx; 183 + struct dma_pool *signal_pool; 202 184 }; 203 185 186 + extern bool scif_reg_cache_enable; 187 + extern bool scif_ulimit_check; 204 188 extern struct scif_info scif_info; 205 189 extern struct idr scif_ports; 206 190 extern struct bus_type scif_peer_bus; ··· 216 192 #define SCIF_NODE_QP_SIZE 0x10000 217 193 218 194 #include "scif_nodeqp.h" 195 + #include "scif_rma.h" 196 + #include "scif_rma_list.h" 219 197 220 198 /* 221 199 * scifdev_self:
+24 -1
drivers/misc/mic/scif/scif_map.h
··· 80 80 size_t size) 81 81 { 82 82 if (!scifdev_self(scifdev)) { 83 - if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) 83 + if (scifdev_is_p2p(scifdev)) 84 84 local = local - scifdev->base_addr; 85 85 dma_unmap_single(&scifdev->sdev->dev, local, 86 86 size, DMA_BIDIRECTIONAL); ··· 109 109 110 110 sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); 111 111 } 112 + } 113 + 114 + static __always_inline int 115 + scif_map_page(dma_addr_t *dma_handle, struct page *page, 116 + struct scif_dev *scifdev) 117 + { 118 + int err = 0; 119 + 120 + if (scifdev_self(scifdev)) { 121 + *dma_handle = page_to_phys(page); 122 + } else { 123 + struct scif_hw_dev *sdev = scifdev->sdev; 124 + *dma_handle = dma_map_page(&sdev->dev, 125 + page, 0x0, PAGE_SIZE, 126 + DMA_BIDIRECTIONAL); 127 + if (dma_mapping_error(&sdev->dev, *dma_handle)) 128 + err = -ENOMEM; 129 + else if (scifdev_is_p2p(scifdev)) 130 + *dma_handle = *dma_handle + scifdev->base_addr; 131 + } 132 + if (err) 133 + *dma_handle = 0; 134 + return err; 112 135 } 113 136 #endif /* SCIF_MAP_H */
+8 -2
drivers/misc/mic/scif/scif_nm.c
··· 34 34 list_for_each_safe(pos, tmpq, &scif_info.disconnected) { 35 35 ep = list_entry(pos, struct scif_endpt, list); 36 36 if (ep->remote_dev->node == node) { 37 + scif_unmap_all_windows(ep); 37 38 spin_lock(&ep->lock); 38 39 scif_cleanup_ep_qp(ep); 39 40 spin_unlock(&ep->lock); ··· 51 50 wake_up_interruptible(&ep->sendwq); 52 51 wake_up_interruptible(&ep->recvwq); 53 52 spin_unlock(&ep->lock); 53 + scif_unmap_all_windows(ep); 54 54 } 55 55 } 56 56 mutex_unlock(&scif_info.connlock); ··· 63 61 64 62 if (!qp) 65 63 return; 66 - scif_free_coherent((void *)qp->inbound_q.rb_base, 67 - qp->local_buf, scifdev, qp->inbound_q.size); 64 + scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size); 65 + kfree(qp->inbound_q.rb_base); 68 66 scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); 69 67 kfree(scifdev->qpairs); 70 68 scifdev->qpairs = NULL; ··· 127 125 } 128 126 scif_destroy_intr_wq(dev); 129 127 } 128 + flush_work(&scif_info.misc_work); 130 129 scif_destroy_p2p(dev); 131 130 scif_invalidate_ep(dev->node); 131 + scif_zap_mmaps(dev->node); 132 + scif_cleanup_rma_for_zombies(dev->node); 133 + flush_work(&scif_info.misc_work); 132 134 scif_send_acks(dev); 133 135 if (!dev->node && scif_info.card_initiated_exit) { 134 136 /*
+59 -6
drivers/misc/mic/scif/scif_nodeqp.c
··· 105 105 int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, 106 106 int local_size, struct scif_dev *scifdev) 107 107 { 108 - void *local_q = NULL; 108 + void *local_q = qp->inbound_q.rb_base; 109 109 int err = 0; 110 110 u32 tmp_rd = 0; 111 111 112 112 spin_lock_init(&qp->send_lock); 113 113 spin_lock_init(&qp->recv_lock); 114 114 115 - local_q = kzalloc(local_size, GFP_KERNEL); 115 + /* Allocate rb only if not already allocated */ 116 116 if (!local_q) { 117 - err = -ENOMEM; 118 - return err; 117 + local_q = kzalloc(local_size, GFP_KERNEL); 118 + if (!local_q) { 119 + err = -ENOMEM; 120 + return err; 121 + } 119 122 } 123 + 120 124 err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); 121 125 if (err) 122 126 goto kfree; ··· 556 552 "DISCNT_ACK", 557 553 "CLIENT_SENT", 558 554 "CLIENT_RCVD", 559 - "SCIF_GET_NODE_INFO"}; 555 + "SCIF_GET_NODE_INFO", 556 + "REGISTER", 557 + "REGISTER_ACK", 558 + "REGISTER_NACK", 559 + "UNREGISTER", 560 + "UNREGISTER_ACK", 561 + "UNREGISTER_NACK", 562 + "ALLOC_REQ", 563 + "ALLOC_GNT", 564 + "ALLOC_REJ", 565 + "FREE_PHYS", 566 + "FREE_VIRT", 567 + "MUNMAP", 568 + "MARK", 569 + "MARK_ACK", 570 + "MARK_NACK", 571 + "WAIT", 572 + "WAIT_ACK", 573 + "WAIT_NACK", 574 + "SIGNAL_LOCAL", 575 + "SIGNAL_REMOTE", 576 + "SIG_ACK", 577 + "SIG_NACK"}; 560 578 561 579 static void 562 580 scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, ··· 672 646 * 673 647 * Work queue handler for servicing miscellaneous SCIF tasks. 674 648 * Examples include: 675 - * 1) Cleanup of zombie endpoints. 649 + * 1) Remote fence requests. 650 + * 2) Destruction of temporary registered windows 651 + * created during scif_vreadfrom()/scif_vwriteto(). 652 + * 3) Cleanup of zombie endpoints. 676 653 */ 677 654 void scif_misc_handler(struct work_struct *work) 678 655 { 656 + scif_rma_handle_remote_fences(); 657 + scif_rma_destroy_windows(); 658 + scif_rma_destroy_tcw_invalid(); 679 659 scif_cleanup_zombie_epd(); 680 660 } 681 661 ··· 1027 995 scif_clientsend, /* SCIF_CLIENT_SENT */ 1028 996 scif_clientrcvd, /* SCIF_CLIENT_RCVD */ 1029 997 scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ 998 + scif_recv_reg, /* SCIF_REGISTER */ 999 + scif_recv_reg_ack, /* SCIF_REGISTER_ACK */ 1000 + scif_recv_reg_nack, /* SCIF_REGISTER_NACK */ 1001 + scif_recv_unreg, /* SCIF_UNREGISTER */ 1002 + scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */ 1003 + scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */ 1004 + scif_alloc_req, /* SCIF_ALLOC_REQ */ 1005 + scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */ 1006 + scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */ 1007 + scif_free_virt, /* SCIF_FREE_VIRT */ 1008 + scif_recv_munmap, /* SCIF_MUNMAP */ 1009 + scif_recv_mark, /* SCIF_MARK */ 1010 + scif_recv_mark_resp, /* SCIF_MARK_ACK */ 1011 + scif_recv_mark_resp, /* SCIF_MARK_NACK */ 1012 + scif_recv_wait, /* SCIF_WAIT */ 1013 + scif_recv_wait_resp, /* SCIF_WAIT_ACK */ 1014 + scif_recv_wait_resp, /* SCIF_WAIT_NACK */ 1015 + scif_recv_sig_local, /* SCIF_SIG_LOCAL */ 1016 + scif_recv_sig_remote, /* SCIF_SIG_REMOTE */ 1017 + scif_recv_sig_resp, /* SCIF_SIG_ACK */ 1018 + scif_recv_sig_resp, /* SCIF_SIG_NACK */ 1030 1019 }; 1031 1020 1032 1021 /**
+40 -2
drivers/misc/mic/scif/scif_nodeqp.h
··· 74 74 #define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ 75 75 #define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ 76 76 #define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ 77 - #define SCIF_MAX_MSG SCIF_GET_NODE_INFO 77 + #define SCIF_REGISTER 19 /* Tell peer about a new registered window */ 78 + #define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */ 79 + #define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */ 80 + #define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */ 81 + #define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */ 82 + #define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */ 83 + #define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */ 84 + #define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */ 85 + #define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */ 86 + #define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */ 87 + #define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */ 88 + #define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */ 89 + #define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */ 90 + #define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */ 91 + #define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */ 92 + #define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */ 93 + #define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */ 94 + #define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */ 95 + #define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */ 96 + #define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */ 97 + #define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */ 98 + #define SCIF_MAX_MSG SCIF_SIG_NACK 78 99 79 100 /* 80 101 * struct scifmsg - Node QP message format ··· 111 90 u32 uop; 112 91 u64 payload[4]; 113 92 } __packed; 93 + 94 + /* 95 + * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request 96 + * the remote note to allocate memory 97 + * 98 + * phys_addr: Physical address of the buffer 99 + * vaddr: Virtual address of the buffer 100 + * size: Size of the buffer 101 + * state: Current state 102 + * allocwq: wait queue for status 103 + */ 104 + struct scif_allocmsg { 105 + dma_addr_t phys_addr; 106 + unsigned long vaddr; 107 + size_t size; 108 + enum scif_msg_state state; 109 + wait_queue_head_t allocwq; 110 + }; 114 111 115 112 /* 116 113 * struct scif_qp - Node Queue Pair ··· 197 158 int scif_setup_loopback_qp(struct scif_dev *scifdev); 198 159 int scif_destroy_loopback_qp(struct scif_dev *scifdev); 199 160 void scif_poll_qp_state(struct work_struct *work); 200 - void scif_qp_response_ack(struct work_struct *work); 201 161 void scif_destroy_p2p(struct scif_dev *scifdev); 202 162 void scif_send_exit(struct scif_dev *scifdev); 203 163 static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+14
drivers/misc/mic/scif/scif_peer_bus.c
··· 72 72 static int scif_peer_add_device(struct scif_dev *scifdev) 73 73 { 74 74 struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev); 75 + char pool_name[16]; 75 76 int ret; 76 77 77 78 ret = device_add(&spdev->dev); ··· 82 81 "dnode %d: peer device_add failed\n", scifdev->node); 83 82 goto put_spdev; 84 83 } 84 + 85 + scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode); 86 + scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev, 87 + sizeof(struct scif_status), 1, 88 + 0); 89 + if (!scifdev->signal_pool) { 90 + dev_err(&scifdev->sdev->dev, 91 + "dnode %d: dmam_pool_create failed\n", scifdev->node); 92 + ret = -ENOMEM; 93 + goto del_spdev; 94 + } 85 95 dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode); 86 96 return 0; 97 + del_spdev: 98 + device_del(&spdev->dev); 87 99 put_spdev: 88 100 RCU_INIT_POINTER(scifdev->spdev, NULL); 89 101 synchronize_rcu();