Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.2-rc1 1512 lines 38 kB view raw
1/* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * BSD LICENSE 15 * 16 * Copyright(c) 2015 Intel Corporation. All rights reserved. 17 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * * Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copy 26 * notice, this list of conditions and the following disclaimer in 27 * the documentation and/or other materials provided with the 28 * distribution. 29 * * Neither the name of Intel Corporation nor the names of its 30 * contributors may be used to endorse or promote products derived 31 * from this software without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 * 45 * PCIe NTB Perf Linux driver 46 */ 47 48/* 49 * How to use this tool, by example. 50 * 51 * Assuming $DBG_DIR is something like: 52 * '/sys/kernel/debug/ntb_perf/0000:00:03.0' 53 * Suppose aside from local device there is at least one remote device 54 * connected to NTB with index 0. 55 *----------------------------------------------------------------------------- 56 * Eg: install driver with specified chunk/total orders and dma-enabled flag 57 * 58 * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma 59 *----------------------------------------------------------------------------- 60 * Eg: check NTB ports (index) and MW mapping information 61 * 62 * root@self# cat $DBG_DIR/info 63 *----------------------------------------------------------------------------- 64 * Eg: start performance test with peer (index 0) and get the test metrics 65 * 66 * root@self# echo 0 > $DBG_DIR/run 67 * root@self# cat $DBG_DIR/run 68 */ 69 70#include <linux/init.h> 71#include <linux/kernel.h> 72#include <linux/module.h> 73#include <linux/sched.h> 74#include <linux/wait.h> 75#include <linux/dma-mapping.h> 76#include <linux/dmaengine.h> 77#include <linux/pci.h> 78#include <linux/ktime.h> 79#include <linux/slab.h> 80#include <linux/delay.h> 81#include <linux/sizes.h> 82#include <linux/workqueue.h> 83#include <linux/debugfs.h> 84#include <linux/random.h> 85#include <linux/ntb.h> 86 87#define DRIVER_NAME "ntb_perf" 88#define DRIVER_VERSION "2.0" 89 90MODULE_LICENSE("Dual BSD/GPL"); 91MODULE_VERSION(DRIVER_VERSION); 92MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>"); 93MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); 94 95#define MAX_THREADS_CNT 32 96#define DEF_THREADS_CNT 1 97#define MAX_CHUNK_SIZE SZ_1M 98#define MAX_CHUNK_ORDER 20 /* no larger than 1M */ 99 100#define DMA_TRIES 100 101#define DMA_MDELAY 10 102 103#define MSG_TRIES 500 104#define MSG_UDELAY_LOW 1000 105#define MSG_UDELAY_HIGH 2000 106 107#define PERF_BUF_LEN 1024 108 109static unsigned long max_mw_size; 110module_param(max_mw_size, ulong, 0644); 111MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); 112 113static unsigned char chunk_order = 19; /* 512K */ 114module_param(chunk_order, byte, 0644); 115MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); 116 117static unsigned char total_order = 30; /* 1G */ 118module_param(total_order, byte, 0644); 119MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); 120 121static bool use_dma; /* default to 0 */ 122module_param(use_dma, bool, 0644); 123MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); 124 125/*============================================================================== 126 * Perf driver data definition 127 *============================================================================== 128 */ 129 130enum perf_cmd { 131 PERF_CMD_INVAL = -1,/* invalid spad command */ 132 PERF_CMD_SSIZE = 0, /* send out buffer size */ 133 PERF_CMD_RSIZE = 1, /* recv in buffer size */ 134 PERF_CMD_SXLAT = 2, /* send in buffer xlat */ 135 PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ 136 PERF_CMD_CLEAR = 4, /* clear allocated memory */ 137 PERF_STS_DONE = 5, /* init is done */ 138 PERF_STS_LNKUP = 6, /* link up state flag */ 139}; 140 141struct perf_ctx; 142 143struct perf_peer { 144 struct perf_ctx *perf; 145 int pidx; 146 int gidx; 147 148 /* Outbound MW params */ 149 u64 outbuf_xlat; 150 resource_size_t outbuf_size; 151 void __iomem *outbuf; 152 153 /* Inbound MW params */ 154 dma_addr_t inbuf_xlat; 155 resource_size_t inbuf_size; 156 void *inbuf; 157 158 /* NTB connection setup service */ 159 struct work_struct service; 160 unsigned long sts; 161}; 162#define to_peer_service(__work) \ 163 container_of(__work, struct perf_peer, service) 164 165struct perf_thread { 166 struct perf_ctx *perf; 167 int tidx; 168 169 /* DMA-based test sync parameters */ 170 atomic_t dma_sync; 171 wait_queue_head_t dma_wait; 172 struct dma_chan *dma_chan; 173 174 /* Data source and measured statistics */ 175 void *src; 176 u64 copied; 177 ktime_t duration; 178 int status; 179 struct work_struct work; 180}; 181#define to_thread_work(__work) \ 182 container_of(__work, struct perf_thread, work) 183 184struct perf_ctx { 185 struct ntb_dev *ntb; 186 187 /* Global device index and peers descriptors */ 188 int gidx; 189 int pcnt; 190 struct perf_peer *peers; 191 192 /* Performance measuring work-threads interface */ 193 unsigned long busy_flag; 194 wait_queue_head_t twait; 195 atomic_t tsync; 196 u8 tcnt; 197 struct perf_peer *test_peer; 198 struct perf_thread threads[MAX_THREADS_CNT]; 199 200 /* Scratchpad/Message IO operations */ 201 int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); 202 int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, 203 u64 *data); 204 205 struct dentry *dbgfs_dir; 206}; 207 208/* 209 * Scratchpads-base commands interface 210 */ 211#define PERF_SPAD_CNT(_pcnt) \ 212 (3*((_pcnt) + 1)) 213#define PERF_SPAD_CMD(_gidx) \ 214 (3*(_gidx)) 215#define PERF_SPAD_LDATA(_gidx) \ 216 (3*(_gidx) + 1) 217#define PERF_SPAD_HDATA(_gidx) \ 218 (3*(_gidx) + 2) 219#define PERF_SPAD_NOTIFY(_gidx) \ 220 (BIT_ULL(_gidx)) 221 222/* 223 * Messages-base commands interface 224 */ 225#define PERF_MSG_CNT 3 226#define PERF_MSG_CMD 0 227#define PERF_MSG_LDATA 1 228#define PERF_MSG_HDATA 2 229 230/*============================================================================== 231 * Static data declarations 232 *============================================================================== 233 */ 234 235static struct dentry *perf_dbgfs_topdir; 236 237static struct workqueue_struct *perf_wq __read_mostly; 238 239/*============================================================================== 240 * NTB cross-link commands execution service 241 *============================================================================== 242 */ 243 244static void perf_terminate_test(struct perf_ctx *perf); 245 246static inline bool perf_link_is_up(struct perf_peer *peer) 247{ 248 u64 link; 249 250 link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); 251 return !!(link & BIT_ULL_MASK(peer->pidx)); 252} 253 254static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 255 u64 data) 256{ 257 struct perf_ctx *perf = peer->perf; 258 int try; 259 u32 sts; 260 261 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 262 263 /* 264 * Perform predefined number of attempts before give up. 265 * We are sending the data to the port specific scratchpad, so 266 * to prevent a multi-port access race-condition. Additionally 267 * there is no need in local locking since only thread-safe 268 * service work is using this method. 269 */ 270 for (try = 0; try < MSG_TRIES; try++) { 271 if (!perf_link_is_up(peer)) 272 return -ENOLINK; 273 274 sts = ntb_peer_spad_read(perf->ntb, peer->pidx, 275 PERF_SPAD_CMD(perf->gidx)); 276 if (sts != PERF_CMD_INVAL) { 277 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 278 continue; 279 } 280 281 ntb_peer_spad_write(perf->ntb, peer->pidx, 282 PERF_SPAD_LDATA(perf->gidx), 283 lower_32_bits(data)); 284 ntb_peer_spad_write(perf->ntb, peer->pidx, 285 PERF_SPAD_HDATA(perf->gidx), 286 upper_32_bits(data)); 287 ntb_peer_spad_write(perf->ntb, peer->pidx, 288 PERF_SPAD_CMD(perf->gidx), 289 cmd); 290 ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); 291 292 dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", 293 PERF_SPAD_NOTIFY(peer->gidx)); 294 295 break; 296 } 297 298 return try < MSG_TRIES ? 0 : -EAGAIN; 299} 300 301static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, 302 enum perf_cmd *cmd, u64 *data) 303{ 304 struct perf_peer *peer; 305 u32 val; 306 307 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 308 309 /* 310 * We start scanning all over, since cleared DB may have been set 311 * by any peer. Yes, it makes peer with smaller index being 312 * serviced with greater priority, but it's convenient for spad 313 * and message code unification and simplicity. 314 */ 315 for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { 316 peer = &perf->peers[*pidx]; 317 318 if (!perf_link_is_up(peer)) 319 continue; 320 321 val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); 322 if (val == PERF_CMD_INVAL) 323 continue; 324 325 *cmd = val; 326 327 val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); 328 *data = val; 329 330 val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); 331 *data |= (u64)val << 32; 332 333 /* Next command can be retrieved from now */ 334 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 335 PERF_CMD_INVAL); 336 337 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 338 339 return 0; 340 } 341 342 return -ENODATA; 343} 344 345static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 346 u64 data) 347{ 348 struct perf_ctx *perf = peer->perf; 349 int try, ret; 350 u64 outbits; 351 352 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 353 354 /* 355 * Perform predefined number of attempts before give up. Message 356 * registers are free of race-condition problem when accessed 357 * from different ports, so we don't need splitting registers 358 * by global device index. We also won't have local locking, 359 * since the method is used from service work only. 360 */ 361 outbits = ntb_msg_outbits(perf->ntb); 362 for (try = 0; try < MSG_TRIES; try++) { 363 if (!perf_link_is_up(peer)) 364 return -ENOLINK; 365 366 ret = ntb_msg_clear_sts(perf->ntb, outbits); 367 if (ret) 368 return ret; 369 370 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, 371 lower_32_bits(data)); 372 373 if (ntb_msg_read_sts(perf->ntb) & outbits) { 374 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 375 continue; 376 } 377 378 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, 379 upper_32_bits(data)); 380 381 /* This call shall trigger peer message event */ 382 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd); 383 384 break; 385 } 386 387 return try < MSG_TRIES ? 0 : -EAGAIN; 388} 389 390static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, 391 enum perf_cmd *cmd, u64 *data) 392{ 393 u64 inbits; 394 u32 val; 395 396 inbits = ntb_msg_inbits(perf->ntb); 397 398 if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) 399 return -ENODATA; 400 401 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); 402 *cmd = val; 403 404 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); 405 *data = val; 406 407 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); 408 *data |= (u64)val << 32; 409 410 /* Next command can be retrieved from now */ 411 ntb_msg_clear_sts(perf->ntb, inbits); 412 413 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 414 415 return 0; 416} 417 418static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) 419{ 420 struct perf_ctx *perf = peer->perf; 421 422 if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) 423 return perf->cmd_send(peer, cmd, data); 424 425 dev_err(&perf->ntb->dev, "Send invalid command\n"); 426 return -EINVAL; 427} 428 429static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) 430{ 431 switch (cmd) { 432 case PERF_CMD_SSIZE: 433 case PERF_CMD_RSIZE: 434 case PERF_CMD_SXLAT: 435 case PERF_CMD_RXLAT: 436 case PERF_CMD_CLEAR: 437 break; 438 default: 439 dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); 440 return -EINVAL; 441 } 442 443 /* No need of memory barrier, since bit ops have invernal lock */ 444 set_bit(cmd, &peer->sts); 445 446 dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); 447 448 (void)queue_work(system_highpri_wq, &peer->service); 449 450 return 0; 451} 452 453static int perf_cmd_recv(struct perf_ctx *perf) 454{ 455 struct perf_peer *peer; 456 int ret, pidx, cmd; 457 u64 data; 458 459 while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { 460 peer = &perf->peers[pidx]; 461 462 switch (cmd) { 463 case PERF_CMD_SSIZE: 464 peer->inbuf_size = data; 465 return perf_cmd_exec(peer, PERF_CMD_RSIZE); 466 case PERF_CMD_SXLAT: 467 peer->outbuf_xlat = data; 468 return perf_cmd_exec(peer, PERF_CMD_RXLAT); 469 default: 470 dev_err(&perf->ntb->dev, "Recv invalid command\n"); 471 return -EINVAL; 472 } 473 } 474 475 /* Return 0 if no data left to process, otherwise an error */ 476 return ret == -ENODATA ? 0 : ret; 477} 478 479static void perf_link_event(void *ctx) 480{ 481 struct perf_ctx *perf = ctx; 482 struct perf_peer *peer; 483 bool lnk_up; 484 int pidx; 485 486 for (pidx = 0; pidx < perf->pcnt; pidx++) { 487 peer = &perf->peers[pidx]; 488 489 lnk_up = perf_link_is_up(peer); 490 491 if (lnk_up && 492 !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { 493 perf_cmd_exec(peer, PERF_CMD_SSIZE); 494 } else if (!lnk_up && 495 test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { 496 perf_cmd_exec(peer, PERF_CMD_CLEAR); 497 } 498 } 499} 500 501static void perf_db_event(void *ctx, int vec) 502{ 503 struct perf_ctx *perf = ctx; 504 505 dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, 506 ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); 507 508 /* Just receive all available commands */ 509 (void)perf_cmd_recv(perf); 510} 511 512static void perf_msg_event(void *ctx) 513{ 514 struct perf_ctx *perf = ctx; 515 516 dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", 517 ntb_msg_read_sts(perf->ntb)); 518 519 /* Messages are only sent one-by-one */ 520 (void)perf_cmd_recv(perf); 521} 522 523static const struct ntb_ctx_ops perf_ops = { 524 .link_event = perf_link_event, 525 .db_event = perf_db_event, 526 .msg_event = perf_msg_event 527}; 528 529static void perf_free_outbuf(struct perf_peer *peer) 530{ 531 (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 532} 533 534static int perf_setup_outbuf(struct perf_peer *peer) 535{ 536 struct perf_ctx *perf = peer->perf; 537 int ret; 538 539 /* Outbuf size can be unaligned due to custom max_mw_size */ 540 ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 541 peer->outbuf_xlat, peer->outbuf_size); 542 if (ret) { 543 dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); 544 return ret; 545 } 546 547 /* Initialization is finally done */ 548 set_bit(PERF_STS_DONE, &peer->sts); 549 550 return 0; 551} 552 553static void perf_free_inbuf(struct perf_peer *peer) 554{ 555 if (!peer->inbuf) 556 return; 557 558 (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 559 dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size, 560 peer->inbuf, peer->inbuf_xlat); 561 peer->inbuf = NULL; 562} 563 564static int perf_setup_inbuf(struct perf_peer *peer) 565{ 566 resource_size_t xlat_align, size_align, size_max; 567 struct perf_ctx *perf = peer->perf; 568 int ret; 569 570 /* Get inbound MW parameters */ 571 ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, 572 &xlat_align, &size_align, &size_max); 573 if (ret) { 574 dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); 575 return ret; 576 } 577 578 if (peer->inbuf_size > size_max) { 579 dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", 580 &peer->inbuf_size, &size_max); 581 return -EINVAL; 582 } 583 584 peer->inbuf_size = round_up(peer->inbuf_size, size_align); 585 586 perf_free_inbuf(peer); 587 588 peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size, 589 &peer->inbuf_xlat, GFP_KERNEL); 590 if (!peer->inbuf) { 591 dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", 592 &peer->inbuf_size); 593 return -ENOMEM; 594 } 595 if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { 596 dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); 597 goto err_free_inbuf; 598 } 599 600 ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 601 peer->inbuf_xlat, peer->inbuf_size); 602 if (ret) { 603 dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); 604 goto err_free_inbuf; 605 } 606 607 /* 608 * We submit inbuf xlat transmission cmd for execution here to follow 609 * the code architecture, even though this method is called from service 610 * work itself so the command will be executed right after it returns. 611 */ 612 (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); 613 614 return 0; 615 616err_free_inbuf: 617 perf_free_inbuf(peer); 618 619 return ret; 620} 621 622static void perf_service_work(struct work_struct *work) 623{ 624 struct perf_peer *peer = to_peer_service(work); 625 626 if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) 627 perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); 628 629 if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) 630 perf_setup_inbuf(peer); 631 632 if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) 633 perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); 634 635 if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) 636 perf_setup_outbuf(peer); 637 638 if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { 639 clear_bit(PERF_STS_DONE, &peer->sts); 640 if (test_bit(0, &peer->perf->busy_flag) && 641 peer == peer->perf->test_peer) { 642 dev_warn(&peer->perf->ntb->dev, 643 "Freeing while test on-fly\n"); 644 perf_terminate_test(peer->perf); 645 } 646 perf_free_outbuf(peer); 647 perf_free_inbuf(peer); 648 } 649} 650 651static int perf_init_service(struct perf_ctx *perf) 652{ 653 u64 mask; 654 655 if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { 656 dev_err(&perf->ntb->dev, "Not enough memory windows\n"); 657 return -EINVAL; 658 } 659 660 if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { 661 perf->cmd_send = perf_msg_cmd_send; 662 perf->cmd_recv = perf_msg_cmd_recv; 663 664 dev_dbg(&perf->ntb->dev, "Message service initialized\n"); 665 666 return 0; 667 } 668 669 dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); 670 671 mask = GENMASK_ULL(perf->pcnt, 0); 672 if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && 673 (ntb_db_valid_mask(perf->ntb) & mask) == mask) { 674 perf->cmd_send = perf_spad_cmd_send; 675 perf->cmd_recv = perf_spad_cmd_recv; 676 677 dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); 678 679 return 0; 680 } 681 682 dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); 683 684 dev_err(&perf->ntb->dev, "Command services unsupported\n"); 685 686 return -EINVAL; 687} 688 689static int perf_enable_service(struct perf_ctx *perf) 690{ 691 u64 mask, incmd_bit; 692 int ret, sidx, scnt; 693 694 mask = ntb_db_valid_mask(perf->ntb); 695 (void)ntb_db_set_mask(perf->ntb, mask); 696 697 ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); 698 if (ret) 699 return ret; 700 701 if (perf->cmd_send == perf_msg_cmd_send) { 702 u64 inbits, outbits; 703 704 inbits = ntb_msg_inbits(perf->ntb); 705 outbits = ntb_msg_outbits(perf->ntb); 706 (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); 707 708 incmd_bit = BIT_ULL(__ffs64(inbits)); 709 ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); 710 711 dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); 712 } else { 713 scnt = ntb_spad_count(perf->ntb); 714 for (sidx = 0; sidx < scnt; sidx++) 715 ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); 716 incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); 717 ret = ntb_db_clear_mask(perf->ntb, incmd_bit); 718 719 dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); 720 } 721 if (ret) { 722 ntb_clear_ctx(perf->ntb); 723 return ret; 724 } 725 726 ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 727 /* Might be not necessary */ 728 ntb_link_event(perf->ntb); 729 730 return 0; 731} 732 733static void perf_disable_service(struct perf_ctx *perf) 734{ 735 int pidx; 736 737 ntb_link_disable(perf->ntb); 738 739 if (perf->cmd_send == perf_msg_cmd_send) { 740 u64 inbits; 741 742 inbits = ntb_msg_inbits(perf->ntb); 743 (void)ntb_msg_set_mask(perf->ntb, inbits); 744 } else { 745 (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 746 } 747 748 ntb_clear_ctx(perf->ntb); 749 750 for (pidx = 0; pidx < perf->pcnt; pidx++) 751 perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); 752 753 for (pidx = 0; pidx < perf->pcnt; pidx++) 754 flush_work(&perf->peers[pidx].service); 755} 756 757/*============================================================================== 758 * Performance measuring work-thread 759 *============================================================================== 760 */ 761 762static void perf_dma_copy_callback(void *data) 763{ 764 struct perf_thread *pthr = data; 765 766 atomic_dec(&pthr->dma_sync); 767 wake_up(&pthr->dma_wait); 768} 769 770static int perf_copy_chunk(struct perf_thread *pthr, 771 void __iomem *dst, void *src, size_t len) 772{ 773 struct dma_async_tx_descriptor *tx; 774 struct dmaengine_unmap_data *unmap; 775 struct device *dma_dev; 776 int try = 0, ret = 0; 777 778 if (!use_dma) { 779 memcpy_toio(dst, src, len); 780 goto ret_check_tsync; 781 } 782 783 dma_dev = pthr->dma_chan->device->dev; 784 785 if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), 786 offset_in_page(dst), len)) 787 return -EIO; 788 789 unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); 790 if (!unmap) 791 return -ENOMEM; 792 793 unmap->len = len; 794 unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), 795 offset_in_page(src), len, DMA_TO_DEVICE); 796 if (dma_mapping_error(dma_dev, unmap->addr[0])) { 797 ret = -EIO; 798 goto err_free_resource; 799 } 800 unmap->to_cnt = 1; 801 802 unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst), 803 offset_in_page(dst), len, DMA_FROM_DEVICE); 804 if (dma_mapping_error(dma_dev, unmap->addr[1])) { 805 ret = -EIO; 806 goto err_free_resource; 807 } 808 unmap->from_cnt = 1; 809 810 do { 811 tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], 812 unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 813 if (!tx) 814 msleep(DMA_MDELAY); 815 } while (!tx && (try++ < DMA_TRIES)); 816 817 if (!tx) { 818 ret = -EIO; 819 goto err_free_resource; 820 } 821 822 tx->callback = perf_dma_copy_callback; 823 tx->callback_param = pthr; 824 dma_set_unmap(tx, unmap); 825 826 ret = dma_submit_error(dmaengine_submit(tx)); 827 if (ret) { 828 dmaengine_unmap_put(unmap); 829 goto err_free_resource; 830 } 831 832 dmaengine_unmap_put(unmap); 833 834 atomic_inc(&pthr->dma_sync); 835 dma_async_issue_pending(pthr->dma_chan); 836 837ret_check_tsync: 838 return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; 839 840err_free_resource: 841 dmaengine_unmap_put(unmap); 842 843 return ret; 844} 845 846static bool perf_dma_filter(struct dma_chan *chan, void *data) 847{ 848 struct perf_ctx *perf = data; 849 int node; 850 851 node = dev_to_node(&perf->ntb->dev); 852 853 return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); 854} 855 856static int perf_init_test(struct perf_thread *pthr) 857{ 858 struct perf_ctx *perf = pthr->perf; 859 dma_cap_mask_t dma_mask; 860 861 pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, 862 dev_to_node(&perf->ntb->dev)); 863 if (!pthr->src) 864 return -ENOMEM; 865 866 get_random_bytes(pthr->src, perf->test_peer->outbuf_size); 867 868 if (!use_dma) 869 return 0; 870 871 dma_cap_zero(dma_mask); 872 dma_cap_set(DMA_MEMCPY, dma_mask); 873 pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); 874 if (!pthr->dma_chan) { 875 dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", 876 pthr->tidx); 877 atomic_dec(&perf->tsync); 878 wake_up(&perf->twait); 879 kfree(pthr->src); 880 return -ENODEV; 881 } 882 883 atomic_set(&pthr->dma_sync, 0); 884 885 return 0; 886} 887 888static int perf_run_test(struct perf_thread *pthr) 889{ 890 struct perf_peer *peer = pthr->perf->test_peer; 891 struct perf_ctx *perf = pthr->perf; 892 void __iomem *flt_dst, *bnd_dst; 893 u64 total_size, chunk_size; 894 void *flt_src; 895 int ret = 0; 896 897 total_size = 1ULL << total_order; 898 chunk_size = 1ULL << chunk_order; 899 chunk_size = min_t(u64, peer->outbuf_size, chunk_size); 900 901 flt_src = pthr->src; 902 bnd_dst = peer->outbuf + peer->outbuf_size; 903 flt_dst = peer->outbuf; 904 905 pthr->duration = ktime_get(); 906 907 /* Copied field is cleared on test launch stage */ 908 while (pthr->copied < total_size) { 909 ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); 910 if (ret) { 911 dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", 912 pthr->tidx, ret); 913 return ret; 914 } 915 916 pthr->copied += chunk_size; 917 918 flt_dst += chunk_size; 919 flt_src += chunk_size; 920 if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { 921 flt_dst = peer->outbuf; 922 flt_src = pthr->src; 923 } 924 925 /* Give up CPU to give a chance for other threads to use it */ 926 schedule(); 927 } 928 929 return 0; 930} 931 932static int perf_sync_test(struct perf_thread *pthr) 933{ 934 struct perf_ctx *perf = pthr->perf; 935 936 if (!use_dma) 937 goto no_dma_ret; 938 939 wait_event(pthr->dma_wait, 940 (atomic_read(&pthr->dma_sync) == 0 || 941 atomic_read(&perf->tsync) < 0)); 942 943 if (atomic_read(&perf->tsync) < 0) 944 return -EINTR; 945 946no_dma_ret: 947 pthr->duration = ktime_sub(ktime_get(), pthr->duration); 948 949 dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", 950 pthr->tidx, pthr->copied); 951 952 dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", 953 pthr->tidx, ktime_to_us(pthr->duration)); 954 955 dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, 956 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 957 958 return 0; 959} 960 961static void perf_clear_test(struct perf_thread *pthr) 962{ 963 struct perf_ctx *perf = pthr->perf; 964 965 if (!use_dma) 966 goto no_dma_notify; 967 968 /* 969 * If test finished without errors, termination isn't needed. 970 * We call it anyway just to be sure of the transfers completion. 971 */ 972 (void)dmaengine_terminate_sync(pthr->dma_chan); 973 974 dma_release_channel(pthr->dma_chan); 975 976no_dma_notify: 977 atomic_dec(&perf->tsync); 978 wake_up(&perf->twait); 979 kfree(pthr->src); 980} 981 982static void perf_thread_work(struct work_struct *work) 983{ 984 struct perf_thread *pthr = to_thread_work(work); 985 int ret; 986 987 /* 988 * Perform stages in compliance with use_dma flag value. 989 * Test status is changed only if error happened, otherwise 990 * status -ENODATA is kept while test is on-fly. Results 991 * synchronization is performed only if test fininshed 992 * without an error or interruption. 993 */ 994 ret = perf_init_test(pthr); 995 if (ret) { 996 pthr->status = ret; 997 return; 998 } 999 1000 ret = perf_run_test(pthr); 1001 if (ret) { 1002 pthr->status = ret; 1003 goto err_clear_test; 1004 } 1005 1006 pthr->status = perf_sync_test(pthr); 1007 1008err_clear_test: 1009 perf_clear_test(pthr); 1010} 1011 1012static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) 1013{ 1014 if (tcnt == 0 || tcnt > MAX_THREADS_CNT) 1015 return -EINVAL; 1016 1017 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1018 return -EBUSY; 1019 1020 perf->tcnt = tcnt; 1021 1022 clear_bit_unlock(0, &perf->busy_flag); 1023 1024 return 0; 1025} 1026 1027static void perf_terminate_test(struct perf_ctx *perf) 1028{ 1029 int tidx; 1030 1031 atomic_set(&perf->tsync, -1); 1032 wake_up(&perf->twait); 1033 1034 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1035 wake_up(&perf->threads[tidx].dma_wait); 1036 cancel_work_sync(&perf->threads[tidx].work); 1037 } 1038} 1039 1040static int perf_submit_test(struct perf_peer *peer) 1041{ 1042 struct perf_ctx *perf = peer->perf; 1043 struct perf_thread *pthr; 1044 int tidx, ret; 1045 1046 if (!test_bit(PERF_STS_DONE, &peer->sts)) 1047 return -ENOLINK; 1048 1049 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1050 return -EBUSY; 1051 1052 perf->test_peer = peer; 1053 atomic_set(&perf->tsync, perf->tcnt); 1054 1055 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1056 pthr = &perf->threads[tidx]; 1057 1058 pthr->status = -ENODATA; 1059 pthr->copied = 0; 1060 pthr->duration = ktime_set(0, 0); 1061 if (tidx < perf->tcnt) 1062 (void)queue_work(perf_wq, &pthr->work); 1063 } 1064 1065 ret = wait_event_interruptible(perf->twait, 1066 atomic_read(&perf->tsync) <= 0); 1067 if (ret == -ERESTARTSYS) { 1068 perf_terminate_test(perf); 1069 ret = -EINTR; 1070 } 1071 1072 clear_bit_unlock(0, &perf->busy_flag); 1073 1074 return ret; 1075} 1076 1077static int perf_read_stats(struct perf_ctx *perf, char *buf, 1078 size_t size, ssize_t *pos) 1079{ 1080 struct perf_thread *pthr; 1081 int tidx; 1082 1083 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1084 return -EBUSY; 1085 1086 (*pos) += scnprintf(buf + *pos, size - *pos, 1087 " Peer %d test statistics:\n", perf->test_peer->pidx); 1088 1089 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1090 pthr = &perf->threads[tidx]; 1091 1092 if (pthr->status == -ENODATA) 1093 continue; 1094 1095 if (pthr->status) { 1096 (*pos) += scnprintf(buf + *pos, size - *pos, 1097 "%d: error status %d\n", tidx, pthr->status); 1098 continue; 1099 } 1100 1101 (*pos) += scnprintf(buf + *pos, size - *pos, 1102 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 1103 tidx, pthr->copied, ktime_to_us(pthr->duration), 1104 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 1105 } 1106 1107 clear_bit_unlock(0, &perf->busy_flag); 1108 1109 return 0; 1110} 1111 1112static void perf_init_threads(struct perf_ctx *perf) 1113{ 1114 struct perf_thread *pthr; 1115 int tidx; 1116 1117 perf->tcnt = DEF_THREADS_CNT; 1118 perf->test_peer = &perf->peers[0]; 1119 init_waitqueue_head(&perf->twait); 1120 1121 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1122 pthr = &perf->threads[tidx]; 1123 1124 pthr->perf = perf; 1125 pthr->tidx = tidx; 1126 pthr->status = -ENODATA; 1127 init_waitqueue_head(&pthr->dma_wait); 1128 INIT_WORK(&pthr->work, perf_thread_work); 1129 } 1130} 1131 1132static void perf_clear_threads(struct perf_ctx *perf) 1133{ 1134 perf_terminate_test(perf); 1135} 1136 1137/*============================================================================== 1138 * DebugFS nodes 1139 *============================================================================== 1140 */ 1141 1142static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, 1143 size_t size, loff_t *offp) 1144{ 1145 struct perf_ctx *perf = filep->private_data; 1146 struct perf_peer *peer; 1147 size_t buf_size; 1148 ssize_t pos = 0; 1149 int ret, pidx; 1150 char *buf; 1151 1152 buf_size = min_t(size_t, size, 0x1000U); 1153 1154 buf = kmalloc(buf_size, GFP_KERNEL); 1155 if (!buf) 1156 return -ENOMEM; 1157 1158 pos += scnprintf(buf + pos, buf_size - pos, 1159 " Performance measuring tool info:\n\n"); 1160 1161 pos += scnprintf(buf + pos, buf_size - pos, 1162 "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), 1163 perf->gidx); 1164 pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); 1165 if (test_bit(0, &perf->busy_flag)) { 1166 pos += scnprintf(buf + pos, buf_size - pos, 1167 "on-fly with port %d (%d)\n", 1168 ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), 1169 perf->test_peer->pidx); 1170 } else { 1171 pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); 1172 } 1173 1174 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1175 peer = &perf->peers[pidx]; 1176 1177 pos += scnprintf(buf + pos, buf_size - pos, 1178 "Port %d (%d), Global index %d:\n", 1179 ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, 1180 peer->gidx); 1181 1182 pos += scnprintf(buf + pos, buf_size - pos, 1183 "\tLink status: %s\n", 1184 test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); 1185 1186 pos += scnprintf(buf + pos, buf_size - pos, 1187 "\tOut buffer addr 0x%pK\n", peer->outbuf); 1188 1189 pos += scnprintf(buf + pos, buf_size - pos, 1190 "\tOut buffer size %pa\n", &peer->outbuf_size); 1191 1192 pos += scnprintf(buf + pos, buf_size - pos, 1193 "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); 1194 1195 if (!peer->inbuf) { 1196 pos += scnprintf(buf + pos, buf_size - pos, 1197 "\tIn buffer addr: unallocated\n"); 1198 continue; 1199 } 1200 1201 pos += scnprintf(buf + pos, buf_size - pos, 1202 "\tIn buffer addr 0x%pK\n", peer->inbuf); 1203 1204 pos += scnprintf(buf + pos, buf_size - pos, 1205 "\tIn buffer size %pa\n", &peer->inbuf_size); 1206 1207 pos += scnprintf(buf + pos, buf_size - pos, 1208 "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); 1209 } 1210 1211 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1212 kfree(buf); 1213 1214 return ret; 1215} 1216 1217static const struct file_operations perf_dbgfs_info = { 1218 .open = simple_open, 1219 .read = perf_dbgfs_read_info 1220}; 1221 1222static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, 1223 size_t size, loff_t *offp) 1224{ 1225 struct perf_ctx *perf = filep->private_data; 1226 ssize_t ret, pos = 0; 1227 char *buf; 1228 1229 buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); 1230 if (!buf) 1231 return -ENOMEM; 1232 1233 ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); 1234 if (ret) 1235 goto err_free; 1236 1237 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1238err_free: 1239 kfree(buf); 1240 1241 return ret; 1242} 1243 1244static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, 1245 size_t size, loff_t *offp) 1246{ 1247 struct perf_ctx *perf = filep->private_data; 1248 struct perf_peer *peer; 1249 int pidx, ret; 1250 1251 ret = kstrtoint_from_user(ubuf, size, 0, &pidx); 1252 if (ret) 1253 return ret; 1254 1255 if (pidx < 0 || pidx >= perf->pcnt) 1256 return -EINVAL; 1257 1258 peer = &perf->peers[pidx]; 1259 1260 ret = perf_submit_test(peer); 1261 if (ret) 1262 return ret; 1263 1264 return size; 1265} 1266 1267static const struct file_operations perf_dbgfs_run = { 1268 .open = simple_open, 1269 .read = perf_dbgfs_read_run, 1270 .write = perf_dbgfs_write_run 1271}; 1272 1273static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, 1274 size_t size, loff_t *offp) 1275{ 1276 struct perf_ctx *perf = filep->private_data; 1277 char buf[8]; 1278 ssize_t pos; 1279 1280 pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); 1281 1282 return simple_read_from_buffer(ubuf, size, offp, buf, pos); 1283} 1284 1285static ssize_t perf_dbgfs_write_tcnt(struct file *filep, 1286 const char __user *ubuf, 1287 size_t size, loff_t *offp) 1288{ 1289 struct perf_ctx *perf = filep->private_data; 1290 int ret; 1291 u8 val; 1292 1293 ret = kstrtou8_from_user(ubuf, size, 0, &val); 1294 if (ret) 1295 return ret; 1296 1297 ret = perf_set_tcnt(perf, val); 1298 if (ret) 1299 return ret; 1300 1301 return size; 1302} 1303 1304static const struct file_operations perf_dbgfs_tcnt = { 1305 .open = simple_open, 1306 .read = perf_dbgfs_read_tcnt, 1307 .write = perf_dbgfs_write_tcnt 1308}; 1309 1310static void perf_setup_dbgfs(struct perf_ctx *perf) 1311{ 1312 struct pci_dev *pdev = perf->ntb->pdev; 1313 1314 perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); 1315 if (!perf->dbgfs_dir) { 1316 dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); 1317 return; 1318 } 1319 1320 debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, 1321 &perf_dbgfs_info); 1322 1323 debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, 1324 &perf_dbgfs_run); 1325 1326 debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, 1327 &perf_dbgfs_tcnt); 1328 1329 /* They are made read-only for test exec safety and integrity */ 1330 debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); 1331 1332 debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); 1333 1334 debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); 1335} 1336 1337static void perf_clear_dbgfs(struct perf_ctx *perf) 1338{ 1339 debugfs_remove_recursive(perf->dbgfs_dir); 1340} 1341 1342/*============================================================================== 1343 * Basic driver initialization 1344 *============================================================================== 1345 */ 1346 1347static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) 1348{ 1349 struct perf_ctx *perf; 1350 1351 perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); 1352 if (!perf) 1353 return ERR_PTR(-ENOMEM); 1354 1355 perf->pcnt = ntb_peer_port_count(ntb); 1356 perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), 1357 GFP_KERNEL); 1358 if (!perf->peers) 1359 return ERR_PTR(-ENOMEM); 1360 1361 perf->ntb = ntb; 1362 1363 return perf; 1364} 1365 1366static int perf_setup_peer_mw(struct perf_peer *peer) 1367{ 1368 struct perf_ctx *perf = peer->perf; 1369 phys_addr_t phys_addr; 1370 int ret; 1371 1372 /* Get outbound MW parameters and map it */ 1373 ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, 1374 &peer->outbuf_size); 1375 if (ret) 1376 return ret; 1377 1378 peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, 1379 peer->outbuf_size); 1380 if (!peer->outbuf) 1381 return -ENOMEM; 1382 1383 if (max_mw_size && peer->outbuf_size > max_mw_size) { 1384 peer->outbuf_size = max_mw_size; 1385 dev_warn(&peer->perf->ntb->dev, 1386 "Peer %d outbuf reduced to %pa\n", peer->pidx, 1387 &peer->outbuf_size); 1388 } 1389 1390 return 0; 1391} 1392 1393static int perf_init_peers(struct perf_ctx *perf) 1394{ 1395 struct perf_peer *peer; 1396 int pidx, lport, ret; 1397 1398 lport = ntb_port_number(perf->ntb); 1399 perf->gidx = -1; 1400 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1401 peer = &perf->peers[pidx]; 1402 1403 peer->perf = perf; 1404 peer->pidx = pidx; 1405 if (lport < ntb_peer_port_number(perf->ntb, pidx)) { 1406 if (perf->gidx == -1) 1407 perf->gidx = pidx; 1408 peer->gidx = pidx + 1; 1409 } else { 1410 peer->gidx = pidx; 1411 } 1412 INIT_WORK(&peer->service, perf_service_work); 1413 } 1414 if (perf->gidx == -1) 1415 perf->gidx = pidx; 1416 1417 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1418 ret = perf_setup_peer_mw(&perf->peers[pidx]); 1419 if (ret) 1420 return ret; 1421 } 1422 1423 dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); 1424 1425 return 0; 1426} 1427 1428static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 1429{ 1430 struct perf_ctx *perf; 1431 int ret; 1432 1433 perf = perf_create_data(ntb); 1434 if (IS_ERR(perf)) 1435 return PTR_ERR(perf); 1436 1437 ret = perf_init_peers(perf); 1438 if (ret) 1439 return ret; 1440 1441 perf_init_threads(perf); 1442 1443 ret = perf_init_service(perf); 1444 if (ret) 1445 return ret; 1446 1447 ret = perf_enable_service(perf); 1448 if (ret) 1449 return ret; 1450 1451 perf_setup_dbgfs(perf); 1452 1453 return 0; 1454} 1455 1456static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 1457{ 1458 struct perf_ctx *perf = ntb->ctx; 1459 1460 perf_clear_dbgfs(perf); 1461 1462 perf_disable_service(perf); 1463 1464 perf_clear_threads(perf); 1465} 1466 1467static struct ntb_client perf_client = { 1468 .ops = { 1469 .probe = perf_probe, 1470 .remove = perf_remove 1471 } 1472}; 1473 1474static int __init perf_init(void) 1475{ 1476 int ret; 1477 1478 if (chunk_order > MAX_CHUNK_ORDER) { 1479 chunk_order = MAX_CHUNK_ORDER; 1480 pr_info("Chunk order reduced to %hhu\n", chunk_order); 1481 } 1482 1483 if (total_order < chunk_order) { 1484 total_order = chunk_order; 1485 pr_info("Total data order reduced to %hhu\n", total_order); 1486 } 1487 1488 perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); 1489 if (!perf_wq) 1490 return -ENOMEM; 1491 1492 if (debugfs_initialized()) 1493 perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); 1494 1495 ret = ntb_register_client(&perf_client); 1496 if (ret) { 1497 debugfs_remove_recursive(perf_dbgfs_topdir); 1498 destroy_workqueue(perf_wq); 1499 } 1500 1501 return ret; 1502} 1503module_init(perf_init); 1504 1505static void __exit perf_exit(void) 1506{ 1507 ntb_unregister_client(&perf_client); 1508 debugfs_remove_recursive(perf_dbgfs_topdir); 1509 destroy_workqueue(perf_wq); 1510} 1511module_exit(perf_exit); 1512