Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2015 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * BSD LICENSE
14 *
15 * Copyright(c) 2015 Intel Corporation. All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 *
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copy
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
26 * distribution.
27 * * Neither the name of Intel Corporation nor the names of its
28 * contributors may be used to endorse or promote products derived
29 * from this software without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 *
43 * PCIe NTB Perf Linux driver
44 */
45
46#include <linux/init.h>
47#include <linux/kernel.h>
48#include <linux/module.h>
49#include <linux/kthread.h>
50#include <linux/time.h>
51#include <linux/timer.h>
52#include <linux/dma-mapping.h>
53#include <linux/pci.h>
54#include <linux/slab.h>
55#include <linux/spinlock.h>
56#include <linux/debugfs.h>
57#include <linux/dmaengine.h>
58#include <linux/delay.h>
59#include <linux/sizes.h>
60#include <linux/ntb.h>
61#include <linux/mutex.h>
62
63#define DRIVER_NAME "ntb_perf"
64#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
65
66#define DRIVER_LICENSE "Dual BSD/GPL"
67#define DRIVER_VERSION "1.0"
68#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
69
70#define PERF_LINK_DOWN_TIMEOUT 10
71#define PERF_VERSION 0xffff0001
72#define MAX_THREADS 32
73#define MAX_TEST_SIZE SZ_1M
74#define MAX_SRCS 32
75#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50)
76#define DMA_RETRIES 20
77#define SZ_4G (1ULL << 32)
78#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
79#define PIDX NTB_DEF_PEER_IDX
80
81MODULE_LICENSE(DRIVER_LICENSE);
82MODULE_VERSION(DRIVER_VERSION);
83MODULE_AUTHOR(DRIVER_AUTHOR);
84MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
85
86static struct dentry *perf_debugfs_dir;
87
88static unsigned long max_mw_size;
89module_param(max_mw_size, ulong, 0644);
90MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
91
92static unsigned int seg_order = 19; /* 512K */
93module_param(seg_order, uint, 0644);
94MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
95
96static unsigned int run_order = 32; /* 4G */
97module_param(run_order, uint, 0644);
98MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
99
100static bool use_dma; /* default to 0 */
101module_param(use_dma, bool, 0644);
102MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
103
104static bool on_node = true; /* default to 1 */
105module_param(on_node, bool, 0644);
106MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");
107
108struct perf_mw {
109 phys_addr_t phys_addr;
110 resource_size_t phys_size;
111 void __iomem *vbase;
112 size_t xlat_size;
113 size_t buf_size;
114 void *virt_addr;
115 dma_addr_t dma_addr;
116};
117
118struct perf_ctx;
119
120struct pthr_ctx {
121 struct task_struct *thread;
122 struct perf_ctx *perf;
123 atomic_t dma_sync;
124 struct dma_chan *dma_chan;
125 int dma_prep_err;
126 int src_idx;
127 void *srcs[MAX_SRCS];
128 wait_queue_head_t *wq;
129 int status;
130 u64 copied;
131 u64 diff_us;
132};
133
134struct perf_ctx {
135 struct ntb_dev *ntb;
136 spinlock_t db_lock;
137 struct perf_mw mw;
138 bool link_is_up;
139 struct delayed_work link_work;
140 wait_queue_head_t link_wq;
141 u8 perf_threads;
142 /* mutex ensures only one set of threads run at once */
143 struct mutex run_mutex;
144 struct pthr_ctx pthr_ctx[MAX_THREADS];
145 atomic_t tsync;
146 atomic_t tdone;
147};
148
149enum {
150 VERSION = 0,
151 MW_SZ_HIGH,
152 MW_SZ_LOW,
153 MAX_SPAD
154};
155
156static void perf_link_event(void *ctx)
157{
158 struct perf_ctx *perf = ctx;
159
160 if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
161 schedule_delayed_work(&perf->link_work, 2*HZ);
162 } else {
163 dev_dbg(&perf->ntb->pdev->dev, "link down\n");
164
165 if (!perf->link_is_up)
166 cancel_delayed_work_sync(&perf->link_work);
167
168 perf->link_is_up = false;
169 }
170}
171
172static void perf_db_event(void *ctx, int vec)
173{
174 struct perf_ctx *perf = ctx;
175 u64 db_bits, db_mask;
176
177 db_mask = ntb_db_vector_mask(perf->ntb, vec);
178 db_bits = ntb_db_read(perf->ntb);
179
180 dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
181 vec, db_mask, db_bits);
182}
183
184static const struct ntb_ctx_ops perf_ops = {
185 .link_event = perf_link_event,
186 .db_event = perf_db_event,
187};
188
189static void perf_copy_callback(void *data)
190{
191 struct pthr_ctx *pctx = data;
192
193 atomic_dec(&pctx->dma_sync);
194}
195
196static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
197 char *src, size_t size)
198{
199 struct perf_ctx *perf = pctx->perf;
200 struct dma_async_tx_descriptor *txd;
201 struct dma_chan *chan = pctx->dma_chan;
202 struct dma_device *device;
203 struct dmaengine_unmap_data *unmap;
204 dma_cookie_t cookie;
205 size_t src_off, dst_off;
206 struct perf_mw *mw = &perf->mw;
207 void __iomem *vbase;
208 void __iomem *dst_vaddr;
209 dma_addr_t dst_phys;
210 int retries = 0;
211
212 if (!use_dma) {
213 memcpy_toio(dst, src, size);
214 return size;
215 }
216
217 if (!chan) {
218 dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
219 return -EINVAL;
220 }
221
222 device = chan->device;
223 src_off = (uintptr_t)src & ~PAGE_MASK;
224 dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
225
226 if (!is_dma_copy_aligned(device, src_off, dst_off, size))
227 return -ENODEV;
228
229 vbase = mw->vbase;
230 dst_vaddr = dst;
231 dst_phys = mw->phys_addr + (dst_vaddr - vbase);
232
233 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
234 if (!unmap)
235 return -ENOMEM;
236
237 unmap->len = size;
238 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
239 src_off, size, DMA_TO_DEVICE);
240 if (dma_mapping_error(device->dev, unmap->addr[0]))
241 goto err_get_unmap;
242
243 unmap->to_cnt = 1;
244
245 do {
246 txd = device->device_prep_dma_memcpy(chan, dst_phys,
247 unmap->addr[0],
248 size, DMA_PREP_INTERRUPT);
249 if (!txd) {
250 set_current_state(TASK_INTERRUPTIBLE);
251 schedule_timeout(DMA_OUT_RESOURCE_TO);
252 }
253 } while (!txd && (++retries < DMA_RETRIES));
254
255 if (!txd) {
256 pctx->dma_prep_err++;
257 goto err_get_unmap;
258 }
259
260 txd->callback = perf_copy_callback;
261 txd->callback_param = pctx;
262 dma_set_unmap(txd, unmap);
263
264 cookie = dmaengine_submit(txd);
265 if (dma_submit_error(cookie))
266 goto err_set_unmap;
267
268 dmaengine_unmap_put(unmap);
269
270 atomic_inc(&pctx->dma_sync);
271 dma_async_issue_pending(chan);
272
273 return size;
274
275err_set_unmap:
276 dmaengine_unmap_put(unmap);
277err_get_unmap:
278 dmaengine_unmap_put(unmap);
279 return 0;
280}
281
282static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
283 u64 buf_size, u64 win_size, u64 total)
284{
285 int chunks, total_chunks, i;
286 int copied_chunks = 0;
287 u64 copied = 0, result;
288 char __iomem *tmp = dst;
289 u64 perf, diff_us;
290 ktime_t kstart, kstop, kdiff;
291 unsigned long last_sleep = jiffies;
292
293 chunks = div64_u64(win_size, buf_size);
294 total_chunks = div64_u64(total, buf_size);
295 kstart = ktime_get();
296
297 for (i = 0; i < total_chunks; i++) {
298 result = perf_copy(pctx, tmp, src, buf_size);
299 copied += result;
300 copied_chunks++;
301 if (copied_chunks == chunks) {
302 tmp = dst;
303 copied_chunks = 0;
304 } else
305 tmp += buf_size;
306
307 /* Probably should schedule every 5s to prevent soft hang. */
308 if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
309 last_sleep = jiffies;
310 set_current_state(TASK_INTERRUPTIBLE);
311 schedule_timeout(1);
312 }
313
314 if (unlikely(kthread_should_stop()))
315 break;
316 }
317
318 if (use_dma) {
319 pr_debug("%s: All DMA descriptors submitted\n", current->comm);
320 while (atomic_read(&pctx->dma_sync) != 0) {
321 if (kthread_should_stop())
322 break;
323 msleep(20);
324 }
325 }
326
327 kstop = ktime_get();
328 kdiff = ktime_sub(kstop, kstart);
329 diff_us = ktime_to_us(kdiff);
330
331 pr_debug("%s: copied %llu bytes\n", current->comm, copied);
332
333 pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
334
335 perf = div64_u64(copied, diff_us);
336
337 pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);
338
339 pctx->copied = copied;
340 pctx->diff_us = diff_us;
341
342 return 0;
343}
344
345static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
346{
347 /* Is the channel required to be on the same node as the device? */
348 if (!on_node)
349 return true;
350
351 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
352}
353
354static int ntb_perf_thread(void *data)
355{
356 struct pthr_ctx *pctx = data;
357 struct perf_ctx *perf = pctx->perf;
358 struct pci_dev *pdev = perf->ntb->pdev;
359 struct perf_mw *mw = &perf->mw;
360 char __iomem *dst;
361 u64 win_size, buf_size, total;
362 void *src;
363 int rc, node, i;
364 struct dma_chan *dma_chan = NULL;
365
366 pr_debug("kthread %s starting...\n", current->comm);
367
368 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
369
370 if (use_dma && !pctx->dma_chan) {
371 dma_cap_mask_t dma_mask;
372
373 dma_cap_zero(dma_mask);
374 dma_cap_set(DMA_MEMCPY, dma_mask);
375 dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
376 (void *)(unsigned long)node);
377 if (!dma_chan) {
378 pr_warn("%s: cannot acquire DMA channel, quitting\n",
379 current->comm);
380 return -ENODEV;
381 }
382 pctx->dma_chan = dma_chan;
383 }
384
385 for (i = 0; i < MAX_SRCS; i++) {
386 pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
387 if (!pctx->srcs[i]) {
388 rc = -ENOMEM;
389 goto err;
390 }
391 }
392
393 win_size = mw->phys_size;
394 buf_size = 1ULL << seg_order;
395 total = 1ULL << run_order;
396
397 if (buf_size > MAX_TEST_SIZE)
398 buf_size = MAX_TEST_SIZE;
399
400 dst = (char __iomem *)mw->vbase;
401
402 atomic_inc(&perf->tsync);
403 while (atomic_read(&perf->tsync) != perf->perf_threads)
404 schedule();
405
406 src = pctx->srcs[pctx->src_idx];
407 pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
408
409 rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
410
411 atomic_dec(&perf->tsync);
412
413 if (rc < 0) {
414 pr_err("%s: failed\n", current->comm);
415 rc = -ENXIO;
416 goto err;
417 }
418
419 for (i = 0; i < MAX_SRCS; i++) {
420 kfree(pctx->srcs[i]);
421 pctx->srcs[i] = NULL;
422 }
423
424 atomic_inc(&perf->tdone);
425 wake_up(pctx->wq);
426 rc = 0;
427 goto done;
428
429err:
430 for (i = 0; i < MAX_SRCS; i++) {
431 kfree(pctx->srcs[i]);
432 pctx->srcs[i] = NULL;
433 }
434
435 if (dma_chan) {
436 dma_release_channel(dma_chan);
437 pctx->dma_chan = NULL;
438 }
439
440done:
441 /* Wait until we are told to stop */
442 for (;;) {
443 set_current_state(TASK_INTERRUPTIBLE);
444 if (kthread_should_stop())
445 break;
446 schedule();
447 }
448 __set_current_state(TASK_RUNNING);
449
450 return rc;
451}
452
453static void perf_free_mw(struct perf_ctx *perf)
454{
455 struct perf_mw *mw = &perf->mw;
456 struct pci_dev *pdev = perf->ntb->pdev;
457
458 if (!mw->virt_addr)
459 return;
460
461 ntb_mw_clear_trans(perf->ntb, PIDX, 0);
462 dma_free_coherent(&pdev->dev, mw->buf_size,
463 mw->virt_addr, mw->dma_addr);
464 mw->xlat_size = 0;
465 mw->buf_size = 0;
466 mw->virt_addr = NULL;
467}
468
469static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
470{
471 struct perf_mw *mw = &perf->mw;
472 size_t xlat_size, buf_size;
473 resource_size_t xlat_align;
474 resource_size_t xlat_align_size;
475 int rc;
476
477 if (!size)
478 return -EINVAL;
479
480 rc = ntb_mw_get_align(perf->ntb, PIDX, 0, &xlat_align,
481 &xlat_align_size, NULL);
482 if (rc)
483 return rc;
484
485 xlat_size = round_up(size, xlat_align_size);
486 buf_size = round_up(size, xlat_align);
487
488 if (mw->xlat_size == xlat_size)
489 return 0;
490
491 if (mw->buf_size)
492 perf_free_mw(perf);
493
494 mw->xlat_size = xlat_size;
495 mw->buf_size = buf_size;
496
497 mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
498 &mw->dma_addr, GFP_KERNEL);
499 if (!mw->virt_addr) {
500 mw->xlat_size = 0;
501 mw->buf_size = 0;
502 }
503
504 rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
505 if (rc) {
506 dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
507 perf_free_mw(perf);
508 return -EIO;
509 }
510
511 return 0;
512}
513
514static void perf_link_work(struct work_struct *work)
515{
516 struct perf_ctx *perf =
517 container_of(work, struct perf_ctx, link_work.work);
518 struct ntb_dev *ndev = perf->ntb;
519 struct pci_dev *pdev = ndev->pdev;
520 u32 val;
521 u64 size;
522 int rc;
523
524 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
525
526 size = perf->mw.phys_size;
527
528 if (max_mw_size && size > max_mw_size)
529 size = max_mw_size;
530
531 ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
532 ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
533 ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
534
535 /* now read what peer wrote */
536 val = ntb_spad_read(ndev, VERSION);
537 if (val != PERF_VERSION) {
538 dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
539 goto out;
540 }
541
542 val = ntb_spad_read(ndev, MW_SZ_HIGH);
543 size = (u64)val << 32;
544
545 val = ntb_spad_read(ndev, MW_SZ_LOW);
546 size |= val;
547
548 dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
549
550 rc = perf_set_mw(perf, size);
551 if (rc)
552 goto out1;
553
554 perf->link_is_up = true;
555 wake_up(&perf->link_wq);
556
557 return;
558
559out1:
560 perf_free_mw(perf);
561
562out:
563 if (ntb_link_is_up(ndev, NULL, NULL) == 1)
564 schedule_delayed_work(&perf->link_work,
565 msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
566}
567
568static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
569{
570 struct perf_mw *mw;
571 int rc;
572
573 mw = &perf->mw;
574
575 rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
576 if (rc)
577 return rc;
578
579 perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
580 if (!mw->vbase)
581 return -ENOMEM;
582
583 return 0;
584}
585
586static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
587 size_t count, loff_t *offp)
588{
589 struct perf_ctx *perf = filp->private_data;
590 char *buf;
591 ssize_t ret, out_off = 0;
592 struct pthr_ctx *pctx;
593 int i;
594 u64 rate;
595
596 if (!perf)
597 return 0;
598
599 buf = kmalloc(1024, GFP_KERNEL);
600 if (!buf)
601 return -ENOMEM;
602
603 if (mutex_is_locked(&perf->run_mutex)) {
604 out_off = scnprintf(buf, 64, "running\n");
605 goto read_from_buf;
606 }
607
608 for (i = 0; i < MAX_THREADS; i++) {
609 pctx = &perf->pthr_ctx[i];
610
611 if (pctx->status == -ENODATA)
612 break;
613
614 if (pctx->status) {
615 out_off += scnprintf(buf + out_off, 1024 - out_off,
616 "%d: error %d\n", i,
617 pctx->status);
618 continue;
619 }
620
621 rate = div64_u64(pctx->copied, pctx->diff_us);
622 out_off += scnprintf(buf + out_off, 1024 - out_off,
623 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
624 i, pctx->copied, pctx->diff_us, rate);
625 }
626
627read_from_buf:
628 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
629 kfree(buf);
630
631 return ret;
632}
633
634static void threads_cleanup(struct perf_ctx *perf)
635{
636 struct pthr_ctx *pctx;
637 int i;
638
639 for (i = 0; i < MAX_THREADS; i++) {
640 pctx = &perf->pthr_ctx[i];
641 if (pctx->thread) {
642 pctx->status = kthread_stop(pctx->thread);
643 pctx->thread = NULL;
644 }
645 }
646}
647
648static void perf_clear_thread_status(struct perf_ctx *perf)
649{
650 int i;
651
652 for (i = 0; i < MAX_THREADS; i++)
653 perf->pthr_ctx[i].status = -ENODATA;
654}
655
656static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
657 size_t count, loff_t *offp)
658{
659 struct perf_ctx *perf = filp->private_data;
660 int node, i;
661 DECLARE_WAIT_QUEUE_HEAD(wq);
662
663 if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
664 return -ENOLINK;
665
666 if (perf->perf_threads == 0)
667 return -EINVAL;
668
669 if (!mutex_trylock(&perf->run_mutex))
670 return -EBUSY;
671
672 perf_clear_thread_status(perf);
673
674 if (perf->perf_threads > MAX_THREADS) {
675 perf->perf_threads = MAX_THREADS;
676 pr_info("Reset total threads to: %u\n", MAX_THREADS);
677 }
678
679 /* no greater than 1M */
680 if (seg_order > MAX_SEG_ORDER) {
681 seg_order = MAX_SEG_ORDER;
682 pr_info("Fix seg_order to %u\n", seg_order);
683 }
684
685 if (run_order < seg_order) {
686 run_order = seg_order;
687 pr_info("Fix run_order to %u\n", run_order);
688 }
689
690 node = on_node ? dev_to_node(&perf->ntb->pdev->dev)
691 : NUMA_NO_NODE;
692 atomic_set(&perf->tdone, 0);
693
694 /* launch kernel thread */
695 for (i = 0; i < perf->perf_threads; i++) {
696 struct pthr_ctx *pctx;
697
698 pctx = &perf->pthr_ctx[i];
699 atomic_set(&pctx->dma_sync, 0);
700 pctx->perf = perf;
701 pctx->wq = &wq;
702 pctx->thread =
703 kthread_create_on_node(ntb_perf_thread,
704 (void *)pctx,
705 node, "ntb_perf %d", i);
706 if (IS_ERR(pctx->thread)) {
707 pctx->thread = NULL;
708 goto err;
709 } else {
710 wake_up_process(pctx->thread);
711 }
712 }
713
714 wait_event_interruptible(wq,
715 atomic_read(&perf->tdone) == perf->perf_threads);
716
717 threads_cleanup(perf);
718 mutex_unlock(&perf->run_mutex);
719 return count;
720
721err:
722 threads_cleanup(perf);
723 mutex_unlock(&perf->run_mutex);
724 return -ENXIO;
725}
726
727static const struct file_operations ntb_perf_debugfs_run = {
728 .owner = THIS_MODULE,
729 .open = simple_open,
730 .read = debugfs_run_read,
731 .write = debugfs_run_write,
732};
733
734static int perf_debugfs_setup(struct perf_ctx *perf)
735{
736 struct pci_dev *pdev = perf->ntb->pdev;
737 struct dentry *debugfs_node_dir;
738 struct dentry *debugfs_run;
739 struct dentry *debugfs_threads;
740 struct dentry *debugfs_seg_order;
741 struct dentry *debugfs_run_order;
742 struct dentry *debugfs_use_dma;
743 struct dentry *debugfs_on_node;
744
745 if (!debugfs_initialized())
746 return -ENODEV;
747
748 /* Assumpion: only one NTB device in the system */
749 if (!perf_debugfs_dir) {
750 perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
751 if (!perf_debugfs_dir)
752 return -ENODEV;
753 }
754
755 debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
756 perf_debugfs_dir);
757 if (!debugfs_node_dir)
758 goto err;
759
760 debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
761 debugfs_node_dir, perf,
762 &ntb_perf_debugfs_run);
763 if (!debugfs_run)
764 goto err;
765
766 debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
767 debugfs_node_dir,
768 &perf->perf_threads);
769 if (!debugfs_threads)
770 goto err;
771
772 debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
773 debugfs_node_dir,
774 &seg_order);
775 if (!debugfs_seg_order)
776 goto err;
777
778 debugfs_run_order = debugfs_create_u32("run_order", 0600,
779 debugfs_node_dir,
780 &run_order);
781 if (!debugfs_run_order)
782 goto err;
783
784 debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
785 debugfs_node_dir,
786 &use_dma);
787 if (!debugfs_use_dma)
788 goto err;
789
790 debugfs_on_node = debugfs_create_bool("on_node", 0600,
791 debugfs_node_dir,
792 &on_node);
793 if (!debugfs_on_node)
794 goto err;
795
796 return 0;
797
798err:
799 debugfs_remove_recursive(perf_debugfs_dir);
800 perf_debugfs_dir = NULL;
801 return -ENODEV;
802}
803
804static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
805{
806 struct pci_dev *pdev = ntb->pdev;
807 struct perf_ctx *perf;
808 int node;
809 int rc = 0;
810
811 if (ntb_spad_count(ntb) < MAX_SPAD) {
812 dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
813 DRIVER_NAME);
814 return -EIO;
815 }
816
817 if (!ntb->ops->mw_set_trans) {
818 dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
819 return -EINVAL;
820 }
821
822 if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
823 dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
824
825 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
826 perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
827 if (!perf) {
828 rc = -ENOMEM;
829 goto err_perf;
830 }
831
832 perf->ntb = ntb;
833 perf->perf_threads = 1;
834 atomic_set(&perf->tsync, 0);
835 mutex_init(&perf->run_mutex);
836 spin_lock_init(&perf->db_lock);
837 perf_setup_mw(ntb, perf);
838 init_waitqueue_head(&perf->link_wq);
839 INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
840
841 rc = ntb_set_ctx(ntb, perf, &perf_ops);
842 if (rc)
843 goto err_ctx;
844
845 perf->link_is_up = false;
846 ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
847 ntb_link_event(ntb);
848
849 rc = perf_debugfs_setup(perf);
850 if (rc)
851 goto err_ctx;
852
853 perf_clear_thread_status(perf);
854
855 return 0;
856
857err_ctx:
858 cancel_delayed_work_sync(&perf->link_work);
859 kfree(perf);
860err_perf:
861 return rc;
862}
863
864static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
865{
866 struct perf_ctx *perf = ntb->ctx;
867 int i;
868
869 dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
870
871 mutex_lock(&perf->run_mutex);
872
873 cancel_delayed_work_sync(&perf->link_work);
874
875 ntb_clear_ctx(ntb);
876 ntb_link_disable(ntb);
877
878 debugfs_remove_recursive(perf_debugfs_dir);
879 perf_debugfs_dir = NULL;
880
881 if (use_dma) {
882 for (i = 0; i < MAX_THREADS; i++) {
883 struct pthr_ctx *pctx = &perf->pthr_ctx[i];
884
885 if (pctx->dma_chan)
886 dma_release_channel(pctx->dma_chan);
887 }
888 }
889
890 kfree(perf);
891}
892
893static struct ntb_client perf_client = {
894 .ops = {
895 .probe = perf_probe,
896 .remove = perf_remove,
897 },
898};
899module_ntb_client(perf_client);