Merge tag 'ntb-4.6' of git://github.com/jonmason/ntb

Pull NTB bug fixes from Jon Mason:
"NTB bug fixes for tasklet from spinning forever, link errors,
translation window setup, NULL ptr dereference, and ntb-perf errors.

Also, a modification to the driver API that makes _addr functions
optional"

* tag 'ntb-4.6' of git://github.com/jonmason/ntb:
NTB: Remove _addr functions from ntb_hw_amd
NTB: Make _addr functions optional in the API
NTB: Fix incorrect clean up routine in ntb_perf
NTB: Fix incorrect return check in ntb_perf
ntb: fix possible NULL dereference
ntb: add missing setup of translation window
ntb: stop link work when we do not have memory
ntb: stop tasklet from spinning forever during shutdown.
ntb: perf test: fix address space confusion

Changed files
+79 -70
drivers
include
linux
-30
drivers/ntb/hw/amd/ntb_hw_amd.c
··· 357 357 return 0; 358 358 } 359 359 360 - static int amd_ntb_peer_db_addr(struct ntb_dev *ntb, 361 - phys_addr_t *db_addr, 362 - resource_size_t *db_size) 363 - { 364 - struct amd_ntb_dev *ndev = ntb_ndev(ntb); 365 - 366 - if (db_addr) 367 - *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET); 368 - if (db_size) 369 - *db_size = sizeof(u32); 370 - 371 - return 0; 372 - } 373 - 374 360 static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) 375 361 { 376 362 struct amd_ntb_dev *ndev = ntb_ndev(ntb); ··· 398 412 offset = ndev->self_spad + (idx << 2); 399 413 writel(val, mmio + AMD_SPAD_OFFSET + offset); 400 414 401 - return 0; 402 - } 403 - 404 - static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, 405 - phys_addr_t *spad_addr) 406 - { 407 - struct amd_ntb_dev *ndev = ntb_ndev(ntb); 408 - 409 - if (idx < 0 || idx >= ndev->spad_count) 410 - return -EINVAL; 411 - 412 - if (spad_addr) 413 - *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET + 414 - ndev->peer_spad + (idx << 2)); 415 415 return 0; 416 416 } 417 417 ··· 444 472 .db_clear = amd_ntb_db_clear, 445 473 .db_set_mask = amd_ntb_db_set_mask, 446 474 .db_clear_mask = amd_ntb_db_clear_mask, 447 - .peer_db_addr = amd_ntb_peer_db_addr, 448 475 .peer_db_set = amd_ntb_peer_db_set, 449 476 .spad_count = amd_ntb_spad_count, 450 477 .spad_read = amd_ntb_spad_read, 451 478 .spad_write = amd_ntb_spad_write, 452 - .peer_spad_addr = amd_ntb_peer_spad_addr, 453 479 .peer_spad_read = amd_ntb_peer_spad_read, 454 480 .peer_spad_write = amd_ntb_peer_spad_write, 455 481 };
+24 -7
drivers/ntb/ntb_transport.c
··· 124 124 125 125 bool client_ready; 126 126 bool link_is_up; 127 + bool active; 127 128 128 129 u8 qp_num; /* Only 64 QP's are allowed. 0-63 */ 129 130 u64 qp_bit; ··· 720 719 static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) 721 720 { 722 721 qp->link_is_up = false; 722 + qp->active = false; 723 723 724 724 qp->tx_index = 0; 725 725 qp->rx_index = 0; ··· 829 827 struct pci_dev *pdev = ndev->pdev; 830 828 resource_size_t size; 831 829 u32 val; 832 - int rc, i, spad; 830 + int rc = 0, i, spad; 833 831 834 832 /* send the local info, in the opposite order of the way we read it */ 835 833 for (i = 0; i < nt->mw_count; i++) { ··· 899 897 out1: 900 898 for (i = 0; i < nt->mw_count; i++) 901 899 ntb_free_mw(nt, i); 900 + 901 + /* if there's an actual failure, we should just bail */ 902 + if (rc < 0) { 903 + ntb_link_disable(ndev); 904 + return; 905 + } 906 + 902 907 out: 903 908 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 904 909 schedule_delayed_work(&nt->link_work, ··· 935 926 if (val & BIT(qp->qp_num)) { 936 927 dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num); 937 928 qp->link_is_up = true; 929 + qp->active = true; 938 930 939 931 if (qp->event_handler) 940 932 qp->event_handler(qp->cb_data, qp->link_is_up); 941 933 942 - tasklet_schedule(&qp->rxc_db_work); 934 + if (qp->active) 935 + tasklet_schedule(&qp->rxc_db_work); 943 936 } else if (nt->link_is_up) 944 937 schedule_delayed_work(&qp->link_work, 945 938 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); ··· 1422 1411 1423 1412 if (i == qp->rx_max_entry) { 1424 1413 /* there is more work to do */ 1425 - tasklet_schedule(&qp->rxc_db_work); 1414 + if (qp->active) 1415 + tasklet_schedule(&qp->rxc_db_work); 1426 1416 } else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) { 1427 1417 /* the doorbell bit is set: clear it */ 1428 1418 ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num)); ··· 1434 1422 * ntb_process_rxc and clearing the doorbell bit: 1435 1423 * there might be some more work to do. 1436 1424 */ 1437 - tasklet_schedule(&qp->rxc_db_work); 1425 + if (qp->active) 1426 + tasklet_schedule(&qp->rxc_db_work); 1438 1427 } 1439 1428 } 1440 1429 ··· 1773 1760 1774 1761 pdev = qp->ndev->pdev; 1775 1762 1763 + qp->active = false; 1764 + 1776 1765 if (qp->tx_dma_chan) { 1777 1766 struct dma_chan *chan = qp->tx_dma_chan; 1778 1767 /* Putting the dma_chan to NULL will force any new traffic to be ··· 1808 1793 qp_bit = BIT_ULL(qp->qp_num); 1809 1794 1810 1795 ntb_db_set_mask(qp->ndev, qp_bit); 1811 - tasklet_disable(&qp->rxc_db_work); 1796 + tasklet_kill(&qp->rxc_db_work); 1812 1797 1813 1798 cancel_delayed_work_sync(&qp->link_work); 1814 1799 ··· 1901 1886 1902 1887 ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q); 1903 1888 1904 - tasklet_schedule(&qp->rxc_db_work); 1889 + if (qp->active) 1890 + tasklet_schedule(&qp->rxc_db_work); 1905 1891 1906 1892 return 0; 1907 1893 } ··· 2085 2069 qp_num = __ffs(db_bits); 2086 2070 qp = &nt->qp_vec[qp_num]; 2087 2071 2088 - tasklet_schedule(&qp->rxc_db_work); 2072 + if (qp->active) 2073 + tasklet_schedule(&qp->rxc_db_work); 2089 2074 2090 2075 db_bits &= ~BIT_ULL(qp_num); 2091 2076 }
+47 -31
drivers/ntb/test/ntb_perf.c
··· 178 178 atomic_dec(&pctx->dma_sync); 179 179 } 180 180 181 - static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst, 181 + static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, 182 182 char *src, size_t size) 183 183 { 184 184 struct perf_ctx *perf = pctx->perf; ··· 189 189 dma_cookie_t cookie; 190 190 size_t src_off, dst_off; 191 191 struct perf_mw *mw = &perf->mw; 192 - u64 vbase, dst_vaddr; 192 + void __iomem *vbase; 193 + void __iomem *dst_vaddr; 193 194 dma_addr_t dst_phys; 194 195 int retries = 0; 195 196 ··· 205 204 } 206 205 207 206 device = chan->device; 208 - src_off = (size_t)src & ~PAGE_MASK; 209 - dst_off = (size_t)dst & ~PAGE_MASK; 207 + src_off = (uintptr_t)src & ~PAGE_MASK; 208 + dst_off = (uintptr_t __force)dst & ~PAGE_MASK; 210 209 211 210 if (!is_dma_copy_aligned(device, src_off, dst_off, size)) 212 211 return -ENODEV; 213 212 214 - vbase = (u64)(u64 *)mw->vbase; 215 - dst_vaddr = (u64)(u64 *)dst; 213 + vbase = mw->vbase; 214 + dst_vaddr = dst; 216 215 dst_phys = mw->phys_addr + (dst_vaddr - vbase); 217 216 218 217 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); ··· 262 261 return 0; 263 262 } 264 263 265 - static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src, 264 + static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, 266 265 u64 buf_size, u64 win_size, u64 total) 267 266 { 268 267 int chunks, total_chunks, i; 269 268 int copied_chunks = 0; 270 269 u64 copied = 0, result; 271 - char *tmp = dst; 270 + char __iomem *tmp = dst; 272 271 u64 perf, diff_us; 273 272 ktime_t kstart, kstop, kdiff; 274 273 ··· 325 324 struct perf_ctx *perf = pctx->perf; 326 325 struct pci_dev *pdev = perf->ntb->pdev; 327 326 struct perf_mw *mw = &perf->mw; 328 - char *dst; 327 + char __iomem *dst; 329 328 u64 win_size, buf_size, total; 330 329 void *src; 331 330 int rc, node, i; ··· 365 364 if (buf_size > MAX_TEST_SIZE) 366 365 buf_size = MAX_TEST_SIZE; 367 366 368 - dst = (char *)mw->vbase; 367 + dst = (char __iomem *)mw->vbase; 369 368 370 369 atomic_inc(&perf->tsync); 371 370 while (atomic_read(&perf->tsync) != perf->perf_threads) ··· 425 424 { 426 425 struct perf_mw *mw = &perf->mw; 427 426 size_t xlat_size, buf_size; 427 + int rc; 428 428 429 429 if (!size) 430 430 return -EINVAL; ··· 447 445 if (!mw->virt_addr) { 448 446 mw->xlat_size = 0; 449 447 mw->buf_size = 0; 448 + } 449 + 450 + rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size); 451 + if (rc) { 452 + dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); 453 + perf_free_mw(perf); 454 + return -EIO; 450 455 } 451 456 452 457 return 0; ··· 550 541 return 0; 551 542 552 543 buf = kmalloc(64, GFP_KERNEL); 544 + if (!buf) 545 + return -ENOMEM; 553 546 out_offset = snprintf(buf, 64, "%d\n", perf->run); 554 547 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); 555 548 kfree(buf); 556 549 557 550 return ret; 551 + } 552 + 553 + static void threads_cleanup(struct perf_ctx *perf) 554 + { 555 + struct pthr_ctx *pctx; 556 + int i; 557 + 558 + perf->run = false; 559 + for (i = 0; i < MAX_THREADS; i++) { 560 + pctx = &perf->pthr_ctx[i]; 561 + if (pctx->thread) { 562 + kthread_stop(pctx->thread); 563 + pctx->thread = NULL; 564 + } 565 + } 558 566 } 559 567 560 568 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, ··· 589 563 if (atomic_read(&perf->tsync) == 0) 590 564 perf->run = false; 591 565 592 - if (perf->run) { 593 - /* lets stop the threads */ 594 - perf->run = false; 595 - for (i = 0; i < MAX_THREADS; i++) { 596 - if (perf->pthr_ctx[i].thread) { 597 - kthread_stop(perf->pthr_ctx[i].thread); 598 - perf->pthr_ctx[i].thread = NULL; 599 - } else 600 - break; 601 - } 602 - } else { 566 + if (perf->run) 567 + threads_cleanup(perf); 568 + else { 603 569 perf->run = true; 604 570 605 571 if (perf->perf_threads > MAX_THREADS) { ··· 622 604 kthread_create_on_node(ntb_perf_thread, 623 605 (void *)pctx, 624 606 node, "ntb_perf %d", i); 625 - if (pctx->thread) 607 + if (IS_ERR(pctx->thread)) { 608 + pctx->thread = NULL; 609 + goto err; 610 + } else 626 611 wake_up_process(pctx->thread); 627 - else { 628 - perf->run = false; 629 - for (i = 0; i < MAX_THREADS; i++) { 630 - if (pctx->thread) { 631 - kthread_stop(pctx->thread); 632 - pctx->thread = NULL; 633 - } 634 - } 635 - } 636 612 637 613 if (perf->run == false) 638 614 return -ENXIO; ··· 635 623 } 636 624 637 625 return count; 626 + 627 + err: 628 + threads_cleanup(perf); 629 + return -ENXIO; 638 630 } 639 631 640 632 static const struct file_operations ntb_perf_debugfs_run = {
+8 -2
include/linux/ntb.h
··· 284 284 /* ops->db_read_mask && */ 285 285 ops->db_set_mask && 286 286 ops->db_clear_mask && 287 - ops->peer_db_addr && 287 + /* ops->peer_db_addr && */ 288 288 /* ops->peer_db_read && */ 289 289 ops->peer_db_set && 290 290 /* ops->peer_db_clear && */ ··· 295 295 ops->spad_count && 296 296 ops->spad_read && 297 297 ops->spad_write && 298 - ops->peer_spad_addr && 298 + /* ops->peer_spad_addr && */ 299 299 /* ops->peer_spad_read && */ 300 300 ops->peer_spad_write && 301 301 1; ··· 757 757 phys_addr_t *db_addr, 758 758 resource_size_t *db_size) 759 759 { 760 + if (!ntb->ops->peer_db_addr) 761 + return -EINVAL; 762 + 760 763 return ntb->ops->peer_db_addr(ntb, db_addr, db_size); 761 764 } 762 765 ··· 951 948 static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, 952 949 phys_addr_t *spad_addr) 953 950 { 951 + if (!ntb->ops->peer_spad_addr) 952 + return -EINVAL; 953 + 954 954 return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); 955 955 } 956 956