Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.15-rc4 899 lines 21 kB view raw
1/* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * BSD LICENSE 14 * 15 * Copyright(c) 2015 Intel Corporation. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * * Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * * Redistributions in binary form must reproduce the above copy 24 * notice, this list of conditions and the following disclaimer in 25 * the documentation and/or other materials provided with the 26 * distribution. 27 * * Neither the name of Intel Corporation nor the names of its 28 * contributors may be used to endorse or promote products derived 29 * from this software without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 * 43 * PCIe NTB Perf Linux driver 44 */ 45 46#include <linux/init.h> 47#include <linux/kernel.h> 48#include <linux/module.h> 49#include <linux/kthread.h> 50#include <linux/time.h> 51#include <linux/timer.h> 52#include <linux/dma-mapping.h> 53#include <linux/pci.h> 54#include <linux/slab.h> 55#include <linux/spinlock.h> 56#include <linux/debugfs.h> 57#include <linux/dmaengine.h> 58#include <linux/delay.h> 59#include <linux/sizes.h> 60#include <linux/ntb.h> 61#include <linux/mutex.h> 62 63#define DRIVER_NAME "ntb_perf" 64#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" 65 66#define DRIVER_LICENSE "Dual BSD/GPL" 67#define DRIVER_VERSION "1.0" 68#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>" 69 70#define PERF_LINK_DOWN_TIMEOUT 10 71#define PERF_VERSION 0xffff0001 72#define MAX_THREADS 32 73#define MAX_TEST_SIZE SZ_1M 74#define MAX_SRCS 32 75#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) 76#define DMA_RETRIES 20 77#define SZ_4G (1ULL << 32) 78#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ 79#define PIDX NTB_DEF_PEER_IDX 80 81MODULE_LICENSE(DRIVER_LICENSE); 82MODULE_VERSION(DRIVER_VERSION); 83MODULE_AUTHOR(DRIVER_AUTHOR); 84MODULE_DESCRIPTION(DRIVER_DESCRIPTION); 85 86static struct dentry *perf_debugfs_dir; 87 88static unsigned long max_mw_size; 89module_param(max_mw_size, ulong, 0644); 90MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); 91 92static unsigned int seg_order = 19; /* 512K */ 93module_param(seg_order, uint, 0644); 94MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing"); 95 96static unsigned int run_order = 32; /* 4G */ 97module_param(run_order, uint, 0644); 98MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer"); 99 100static bool use_dma; /* default to 0 */ 101module_param(use_dma, bool, 0644); 102MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance"); 103 104static bool on_node = true; /* default to 1 */ 105module_param(on_node, bool, 0644); 106MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)"); 107 108struct perf_mw { 109 phys_addr_t phys_addr; 110 resource_size_t phys_size; 111 void __iomem *vbase; 112 size_t xlat_size; 113 size_t buf_size; 114 void *virt_addr; 115 dma_addr_t dma_addr; 116}; 117 118struct perf_ctx; 119 120struct pthr_ctx { 121 struct task_struct *thread; 122 struct perf_ctx *perf; 123 atomic_t dma_sync; 124 struct dma_chan *dma_chan; 125 int dma_prep_err; 126 int src_idx; 127 void *srcs[MAX_SRCS]; 128 wait_queue_head_t *wq; 129 int status; 130 u64 copied; 131 u64 diff_us; 132}; 133 134struct perf_ctx { 135 struct ntb_dev *ntb; 136 spinlock_t db_lock; 137 struct perf_mw mw; 138 bool link_is_up; 139 struct delayed_work link_work; 140 wait_queue_head_t link_wq; 141 u8 perf_threads; 142 /* mutex ensures only one set of threads run at once */ 143 struct mutex run_mutex; 144 struct pthr_ctx pthr_ctx[MAX_THREADS]; 145 atomic_t tsync; 146 atomic_t tdone; 147}; 148 149enum { 150 VERSION = 0, 151 MW_SZ_HIGH, 152 MW_SZ_LOW, 153 MAX_SPAD 154}; 155 156static void perf_link_event(void *ctx) 157{ 158 struct perf_ctx *perf = ctx; 159 160 if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { 161 schedule_delayed_work(&perf->link_work, 2*HZ); 162 } else { 163 dev_dbg(&perf->ntb->pdev->dev, "link down\n"); 164 165 if (!perf->link_is_up) 166 cancel_delayed_work_sync(&perf->link_work); 167 168 perf->link_is_up = false; 169 } 170} 171 172static void perf_db_event(void *ctx, int vec) 173{ 174 struct perf_ctx *perf = ctx; 175 u64 db_bits, db_mask; 176 177 db_mask = ntb_db_vector_mask(perf->ntb, vec); 178 db_bits = ntb_db_read(perf->ntb); 179 180 dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", 181 vec, db_mask, db_bits); 182} 183 184static const struct ntb_ctx_ops perf_ops = { 185 .link_event = perf_link_event, 186 .db_event = perf_db_event, 187}; 188 189static void perf_copy_callback(void *data) 190{ 191 struct pthr_ctx *pctx = data; 192 193 atomic_dec(&pctx->dma_sync); 194} 195 196static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, 197 char *src, size_t size) 198{ 199 struct perf_ctx *perf = pctx->perf; 200 struct dma_async_tx_descriptor *txd; 201 struct dma_chan *chan = pctx->dma_chan; 202 struct dma_device *device; 203 struct dmaengine_unmap_data *unmap; 204 dma_cookie_t cookie; 205 size_t src_off, dst_off; 206 struct perf_mw *mw = &perf->mw; 207 void __iomem *vbase; 208 void __iomem *dst_vaddr; 209 dma_addr_t dst_phys; 210 int retries = 0; 211 212 if (!use_dma) { 213 memcpy_toio(dst, src, size); 214 return size; 215 } 216 217 if (!chan) { 218 dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); 219 return -EINVAL; 220 } 221 222 device = chan->device; 223 src_off = (uintptr_t)src & ~PAGE_MASK; 224 dst_off = (uintptr_t __force)dst & ~PAGE_MASK; 225 226 if (!is_dma_copy_aligned(device, src_off, dst_off, size)) 227 return -ENODEV; 228 229 vbase = mw->vbase; 230 dst_vaddr = dst; 231 dst_phys = mw->phys_addr + (dst_vaddr - vbase); 232 233 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); 234 if (!unmap) 235 return -ENOMEM; 236 237 unmap->len = size; 238 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), 239 src_off, size, DMA_TO_DEVICE); 240 if (dma_mapping_error(device->dev, unmap->addr[0])) 241 goto err_get_unmap; 242 243 unmap->to_cnt = 1; 244 245 do { 246 txd = device->device_prep_dma_memcpy(chan, dst_phys, 247 unmap->addr[0], 248 size, DMA_PREP_INTERRUPT); 249 if (!txd) { 250 set_current_state(TASK_INTERRUPTIBLE); 251 schedule_timeout(DMA_OUT_RESOURCE_TO); 252 } 253 } while (!txd && (++retries < DMA_RETRIES)); 254 255 if (!txd) { 256 pctx->dma_prep_err++; 257 goto err_get_unmap; 258 } 259 260 txd->callback = perf_copy_callback; 261 txd->callback_param = pctx; 262 dma_set_unmap(txd, unmap); 263 264 cookie = dmaengine_submit(txd); 265 if (dma_submit_error(cookie)) 266 goto err_set_unmap; 267 268 dmaengine_unmap_put(unmap); 269 270 atomic_inc(&pctx->dma_sync); 271 dma_async_issue_pending(chan); 272 273 return size; 274 275err_set_unmap: 276 dmaengine_unmap_put(unmap); 277err_get_unmap: 278 dmaengine_unmap_put(unmap); 279 return 0; 280} 281 282static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, 283 u64 buf_size, u64 win_size, u64 total) 284{ 285 int chunks, total_chunks, i; 286 int copied_chunks = 0; 287 u64 copied = 0, result; 288 char __iomem *tmp = dst; 289 u64 perf, diff_us; 290 ktime_t kstart, kstop, kdiff; 291 unsigned long last_sleep = jiffies; 292 293 chunks = div64_u64(win_size, buf_size); 294 total_chunks = div64_u64(total, buf_size); 295 kstart = ktime_get(); 296 297 for (i = 0; i < total_chunks; i++) { 298 result = perf_copy(pctx, tmp, src, buf_size); 299 copied += result; 300 copied_chunks++; 301 if (copied_chunks == chunks) { 302 tmp = dst; 303 copied_chunks = 0; 304 } else 305 tmp += buf_size; 306 307 /* Probably should schedule every 5s to prevent soft hang. */ 308 if (unlikely((jiffies - last_sleep) > 5 * HZ)) { 309 last_sleep = jiffies; 310 set_current_state(TASK_INTERRUPTIBLE); 311 schedule_timeout(1); 312 } 313 314 if (unlikely(kthread_should_stop())) 315 break; 316 } 317 318 if (use_dma) { 319 pr_debug("%s: All DMA descriptors submitted\n", current->comm); 320 while (atomic_read(&pctx->dma_sync) != 0) { 321 if (kthread_should_stop()) 322 break; 323 msleep(20); 324 } 325 } 326 327 kstop = ktime_get(); 328 kdiff = ktime_sub(kstop, kstart); 329 diff_us = ktime_to_us(kdiff); 330 331 pr_debug("%s: copied %llu bytes\n", current->comm, copied); 332 333 pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); 334 335 perf = div64_u64(copied, diff_us); 336 337 pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); 338 339 pctx->copied = copied; 340 pctx->diff_us = diff_us; 341 342 return 0; 343} 344 345static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) 346{ 347 /* Is the channel required to be on the same node as the device? */ 348 if (!on_node) 349 return true; 350 351 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; 352} 353 354static int ntb_perf_thread(void *data) 355{ 356 struct pthr_ctx *pctx = data; 357 struct perf_ctx *perf = pctx->perf; 358 struct pci_dev *pdev = perf->ntb->pdev; 359 struct perf_mw *mw = &perf->mw; 360 char __iomem *dst; 361 u64 win_size, buf_size, total; 362 void *src; 363 int rc, node, i; 364 struct dma_chan *dma_chan = NULL; 365 366 pr_debug("kthread %s starting...\n", current->comm); 367 368 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 369 370 if (use_dma && !pctx->dma_chan) { 371 dma_cap_mask_t dma_mask; 372 373 dma_cap_zero(dma_mask); 374 dma_cap_set(DMA_MEMCPY, dma_mask); 375 dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, 376 (void *)(unsigned long)node); 377 if (!dma_chan) { 378 pr_warn("%s: cannot acquire DMA channel, quitting\n", 379 current->comm); 380 return -ENODEV; 381 } 382 pctx->dma_chan = dma_chan; 383 } 384 385 for (i = 0; i < MAX_SRCS; i++) { 386 pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); 387 if (!pctx->srcs[i]) { 388 rc = -ENOMEM; 389 goto err; 390 } 391 } 392 393 win_size = mw->phys_size; 394 buf_size = 1ULL << seg_order; 395 total = 1ULL << run_order; 396 397 if (buf_size > MAX_TEST_SIZE) 398 buf_size = MAX_TEST_SIZE; 399 400 dst = (char __iomem *)mw->vbase; 401 402 atomic_inc(&perf->tsync); 403 while (atomic_read(&perf->tsync) != perf->perf_threads) 404 schedule(); 405 406 src = pctx->srcs[pctx->src_idx]; 407 pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); 408 409 rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); 410 411 atomic_dec(&perf->tsync); 412 413 if (rc < 0) { 414 pr_err("%s: failed\n", current->comm); 415 rc = -ENXIO; 416 goto err; 417 } 418 419 for (i = 0; i < MAX_SRCS; i++) { 420 kfree(pctx->srcs[i]); 421 pctx->srcs[i] = NULL; 422 } 423 424 atomic_inc(&perf->tdone); 425 wake_up(pctx->wq); 426 rc = 0; 427 goto done; 428 429err: 430 for (i = 0; i < MAX_SRCS; i++) { 431 kfree(pctx->srcs[i]); 432 pctx->srcs[i] = NULL; 433 } 434 435 if (dma_chan) { 436 dma_release_channel(dma_chan); 437 pctx->dma_chan = NULL; 438 } 439 440done: 441 /* Wait until we are told to stop */ 442 for (;;) { 443 set_current_state(TASK_INTERRUPTIBLE); 444 if (kthread_should_stop()) 445 break; 446 schedule(); 447 } 448 __set_current_state(TASK_RUNNING); 449 450 return rc; 451} 452 453static void perf_free_mw(struct perf_ctx *perf) 454{ 455 struct perf_mw *mw = &perf->mw; 456 struct pci_dev *pdev = perf->ntb->pdev; 457 458 if (!mw->virt_addr) 459 return; 460 461 ntb_mw_clear_trans(perf->ntb, PIDX, 0); 462 dma_free_coherent(&pdev->dev, mw->buf_size, 463 mw->virt_addr, mw->dma_addr); 464 mw->xlat_size = 0; 465 mw->buf_size = 0; 466 mw->virt_addr = NULL; 467} 468 469static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) 470{ 471 struct perf_mw *mw = &perf->mw; 472 size_t xlat_size, buf_size; 473 resource_size_t xlat_align; 474 resource_size_t xlat_align_size; 475 int rc; 476 477 if (!size) 478 return -EINVAL; 479 480 rc = ntb_mw_get_align(perf->ntb, PIDX, 0, &xlat_align, 481 &xlat_align_size, NULL); 482 if (rc) 483 return rc; 484 485 xlat_size = round_up(size, xlat_align_size); 486 buf_size = round_up(size, xlat_align); 487 488 if (mw->xlat_size == xlat_size) 489 return 0; 490 491 if (mw->buf_size) 492 perf_free_mw(perf); 493 494 mw->xlat_size = xlat_size; 495 mw->buf_size = buf_size; 496 497 mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, 498 &mw->dma_addr, GFP_KERNEL); 499 if (!mw->virt_addr) { 500 mw->xlat_size = 0; 501 mw->buf_size = 0; 502 } 503 504 rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size); 505 if (rc) { 506 dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); 507 perf_free_mw(perf); 508 return -EIO; 509 } 510 511 return 0; 512} 513 514static void perf_link_work(struct work_struct *work) 515{ 516 struct perf_ctx *perf = 517 container_of(work, struct perf_ctx, link_work.work); 518 struct ntb_dev *ndev = perf->ntb; 519 struct pci_dev *pdev = ndev->pdev; 520 u32 val; 521 u64 size; 522 int rc; 523 524 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); 525 526 size = perf->mw.phys_size; 527 528 if (max_mw_size && size > max_mw_size) 529 size = max_mw_size; 530 531 ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size)); 532 ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size)); 533 ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION); 534 535 /* now read what peer wrote */ 536 val = ntb_spad_read(ndev, VERSION); 537 if (val != PERF_VERSION) { 538 dev_dbg(&pdev->dev, "Remote version = %#x\n", val); 539 goto out; 540 } 541 542 val = ntb_spad_read(ndev, MW_SZ_HIGH); 543 size = (u64)val << 32; 544 545 val = ntb_spad_read(ndev, MW_SZ_LOW); 546 size |= val; 547 548 dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); 549 550 rc = perf_set_mw(perf, size); 551 if (rc) 552 goto out1; 553 554 perf->link_is_up = true; 555 wake_up(&perf->link_wq); 556 557 return; 558 559out1: 560 perf_free_mw(perf); 561 562out: 563 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 564 schedule_delayed_work(&perf->link_work, 565 msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); 566} 567 568static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) 569{ 570 struct perf_mw *mw; 571 int rc; 572 573 mw = &perf->mw; 574 575 rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size); 576 if (rc) 577 return rc; 578 579 perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); 580 if (!mw->vbase) 581 return -ENOMEM; 582 583 return 0; 584} 585 586static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, 587 size_t count, loff_t *offp) 588{ 589 struct perf_ctx *perf = filp->private_data; 590 char *buf; 591 ssize_t ret, out_off = 0; 592 struct pthr_ctx *pctx; 593 int i; 594 u64 rate; 595 596 if (!perf) 597 return 0; 598 599 buf = kmalloc(1024, GFP_KERNEL); 600 if (!buf) 601 return -ENOMEM; 602 603 if (mutex_is_locked(&perf->run_mutex)) { 604 out_off = scnprintf(buf, 64, "running\n"); 605 goto read_from_buf; 606 } 607 608 for (i = 0; i < MAX_THREADS; i++) { 609 pctx = &perf->pthr_ctx[i]; 610 611 if (pctx->status == -ENODATA) 612 break; 613 614 if (pctx->status) { 615 out_off += scnprintf(buf + out_off, 1024 - out_off, 616 "%d: error %d\n", i, 617 pctx->status); 618 continue; 619 } 620 621 rate = div64_u64(pctx->copied, pctx->diff_us); 622 out_off += scnprintf(buf + out_off, 1024 - out_off, 623 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 624 i, pctx->copied, pctx->diff_us, rate); 625 } 626 627read_from_buf: 628 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); 629 kfree(buf); 630 631 return ret; 632} 633 634static void threads_cleanup(struct perf_ctx *perf) 635{ 636 struct pthr_ctx *pctx; 637 int i; 638 639 for (i = 0; i < MAX_THREADS; i++) { 640 pctx = &perf->pthr_ctx[i]; 641 if (pctx->thread) { 642 pctx->status = kthread_stop(pctx->thread); 643 pctx->thread = NULL; 644 } 645 } 646} 647 648static void perf_clear_thread_status(struct perf_ctx *perf) 649{ 650 int i; 651 652 for (i = 0; i < MAX_THREADS; i++) 653 perf->pthr_ctx[i].status = -ENODATA; 654} 655 656static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, 657 size_t count, loff_t *offp) 658{ 659 struct perf_ctx *perf = filp->private_data; 660 int node, i; 661 DECLARE_WAIT_QUEUE_HEAD(wq); 662 663 if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) 664 return -ENOLINK; 665 666 if (perf->perf_threads == 0) 667 return -EINVAL; 668 669 if (!mutex_trylock(&perf->run_mutex)) 670 return -EBUSY; 671 672 perf_clear_thread_status(perf); 673 674 if (perf->perf_threads > MAX_THREADS) { 675 perf->perf_threads = MAX_THREADS; 676 pr_info("Reset total threads to: %u\n", MAX_THREADS); 677 } 678 679 /* no greater than 1M */ 680 if (seg_order > MAX_SEG_ORDER) { 681 seg_order = MAX_SEG_ORDER; 682 pr_info("Fix seg_order to %u\n", seg_order); 683 } 684 685 if (run_order < seg_order) { 686 run_order = seg_order; 687 pr_info("Fix run_order to %u\n", run_order); 688 } 689 690 node = on_node ? dev_to_node(&perf->ntb->pdev->dev) 691 : NUMA_NO_NODE; 692 atomic_set(&perf->tdone, 0); 693 694 /* launch kernel thread */ 695 for (i = 0; i < perf->perf_threads; i++) { 696 struct pthr_ctx *pctx; 697 698 pctx = &perf->pthr_ctx[i]; 699 atomic_set(&pctx->dma_sync, 0); 700 pctx->perf = perf; 701 pctx->wq = &wq; 702 pctx->thread = 703 kthread_create_on_node(ntb_perf_thread, 704 (void *)pctx, 705 node, "ntb_perf %d", i); 706 if (IS_ERR(pctx->thread)) { 707 pctx->thread = NULL; 708 goto err; 709 } else { 710 wake_up_process(pctx->thread); 711 } 712 } 713 714 wait_event_interruptible(wq, 715 atomic_read(&perf->tdone) == perf->perf_threads); 716 717 threads_cleanup(perf); 718 mutex_unlock(&perf->run_mutex); 719 return count; 720 721err: 722 threads_cleanup(perf); 723 mutex_unlock(&perf->run_mutex); 724 return -ENXIO; 725} 726 727static const struct file_operations ntb_perf_debugfs_run = { 728 .owner = THIS_MODULE, 729 .open = simple_open, 730 .read = debugfs_run_read, 731 .write = debugfs_run_write, 732}; 733 734static int perf_debugfs_setup(struct perf_ctx *perf) 735{ 736 struct pci_dev *pdev = perf->ntb->pdev; 737 struct dentry *debugfs_node_dir; 738 struct dentry *debugfs_run; 739 struct dentry *debugfs_threads; 740 struct dentry *debugfs_seg_order; 741 struct dentry *debugfs_run_order; 742 struct dentry *debugfs_use_dma; 743 struct dentry *debugfs_on_node; 744 745 if (!debugfs_initialized()) 746 return -ENODEV; 747 748 /* Assumpion: only one NTB device in the system */ 749 if (!perf_debugfs_dir) { 750 perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); 751 if (!perf_debugfs_dir) 752 return -ENODEV; 753 } 754 755 debugfs_node_dir = debugfs_create_dir(pci_name(pdev), 756 perf_debugfs_dir); 757 if (!debugfs_node_dir) 758 goto err; 759 760 debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, 761 debugfs_node_dir, perf, 762 &ntb_perf_debugfs_run); 763 if (!debugfs_run) 764 goto err; 765 766 debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, 767 debugfs_node_dir, 768 &perf->perf_threads); 769 if (!debugfs_threads) 770 goto err; 771 772 debugfs_seg_order = debugfs_create_u32("seg_order", 0600, 773 debugfs_node_dir, 774 &seg_order); 775 if (!debugfs_seg_order) 776 goto err; 777 778 debugfs_run_order = debugfs_create_u32("run_order", 0600, 779 debugfs_node_dir, 780 &run_order); 781 if (!debugfs_run_order) 782 goto err; 783 784 debugfs_use_dma = debugfs_create_bool("use_dma", 0600, 785 debugfs_node_dir, 786 &use_dma); 787 if (!debugfs_use_dma) 788 goto err; 789 790 debugfs_on_node = debugfs_create_bool("on_node", 0600, 791 debugfs_node_dir, 792 &on_node); 793 if (!debugfs_on_node) 794 goto err; 795 796 return 0; 797 798err: 799 debugfs_remove_recursive(perf_debugfs_dir); 800 perf_debugfs_dir = NULL; 801 return -ENODEV; 802} 803 804static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 805{ 806 struct pci_dev *pdev = ntb->pdev; 807 struct perf_ctx *perf; 808 int node; 809 int rc = 0; 810 811 if (ntb_spad_count(ntb) < MAX_SPAD) { 812 dev_err(&ntb->dev, "Not enough scratch pad registers for %s", 813 DRIVER_NAME); 814 return -EIO; 815 } 816 817 if (!ntb->ops->mw_set_trans) { 818 dev_err(&ntb->dev, "Need inbound MW based NTB API\n"); 819 return -EINVAL; 820 } 821 822 if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) 823 dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n"); 824 825 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 826 perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); 827 if (!perf) { 828 rc = -ENOMEM; 829 goto err_perf; 830 } 831 832 perf->ntb = ntb; 833 perf->perf_threads = 1; 834 atomic_set(&perf->tsync, 0); 835 mutex_init(&perf->run_mutex); 836 spin_lock_init(&perf->db_lock); 837 perf_setup_mw(ntb, perf); 838 init_waitqueue_head(&perf->link_wq); 839 INIT_DELAYED_WORK(&perf->link_work, perf_link_work); 840 841 rc = ntb_set_ctx(ntb, perf, &perf_ops); 842 if (rc) 843 goto err_ctx; 844 845 perf->link_is_up = false; 846 ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 847 ntb_link_event(ntb); 848 849 rc = perf_debugfs_setup(perf); 850 if (rc) 851 goto err_ctx; 852 853 perf_clear_thread_status(perf); 854 855 return 0; 856 857err_ctx: 858 cancel_delayed_work_sync(&perf->link_work); 859 kfree(perf); 860err_perf: 861 return rc; 862} 863 864static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 865{ 866 struct perf_ctx *perf = ntb->ctx; 867 int i; 868 869 dev_dbg(&perf->ntb->dev, "%s called\n", __func__); 870 871 mutex_lock(&perf->run_mutex); 872 873 cancel_delayed_work_sync(&perf->link_work); 874 875 ntb_clear_ctx(ntb); 876 ntb_link_disable(ntb); 877 878 debugfs_remove_recursive(perf_debugfs_dir); 879 perf_debugfs_dir = NULL; 880 881 if (use_dma) { 882 for (i = 0; i < MAX_THREADS; i++) { 883 struct pthr_ctx *pctx = &perf->pthr_ctx[i]; 884 885 if (pctx->dma_chan) 886 dma_release_channel(pctx->dma_chan); 887 } 888 } 889 890 kfree(perf); 891} 892 893static struct ntb_client perf_client = { 894 .ops = { 895 .probe = perf_probe, 896 .remove = perf_remove, 897 }, 898}; 899module_ntb_client(perf_client);