Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.35 2790 lines 83 kB view raw
1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/sched.h> 35#include <linux/spinlock.h> 36#include <linux/idr.h> 37#include <linux/pci.h> 38#include <linux/io.h> 39#include <linux/delay.h> 40#include <linux/netdevice.h> 41#include <linux/vmalloc.h> 42#include <linux/bitmap.h> 43#include <linux/slab.h> 44 45#include "ipath_kernel.h" 46#include "ipath_verbs.h" 47 48static void ipath_update_pio_bufs(struct ipath_devdata *); 49 50const char *ipath_get_unit_name(int unit) 51{ 52 static char iname[16]; 53 snprintf(iname, sizeof iname, "infinipath%u", unit); 54 return iname; 55} 56 57#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " 58#define PFX IPATH_DRV_NAME ": " 59 60/* 61 * The size has to be longer than this string, so we can append 62 * board/chip information to it in the init code. 63 */ 64const char ib_ipath_version[] = IPATH_IDSTR "\n"; 65 66static struct idr unit_table; 67DEFINE_SPINLOCK(ipath_devs_lock); 68LIST_HEAD(ipath_dev_list); 69 70wait_queue_head_t ipath_state_wait; 71 72unsigned ipath_debug = __IPATH_INFO; 73 74module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); 75MODULE_PARM_DESC(debug, "mask for debug prints"); 76EXPORT_SYMBOL_GPL(ipath_debug); 77 78unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ 79module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); 80MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); 81 82static unsigned ipath_hol_timeout_ms = 13000; 83module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); 84MODULE_PARM_DESC(hol_timeout_ms, 85 "duration of user app suspension after link failure"); 86 87unsigned ipath_linkrecovery = 1; 88module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); 89MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); 90 91MODULE_LICENSE("GPL"); 92MODULE_AUTHOR("QLogic <support@qlogic.com>"); 93MODULE_DESCRIPTION("QLogic InfiniPath driver"); 94 95/* 96 * Table to translate the LINKTRAININGSTATE portion of 97 * IBCStatus to a human-readable form. 98 */ 99const char *ipath_ibcstatus_str[] = { 100 "Disabled", 101 "LinkUp", 102 "PollActive", 103 "PollQuiet", 104 "SleepDelay", 105 "SleepQuiet", 106 "LState6", /* unused */ 107 "LState7", /* unused */ 108 "CfgDebounce", 109 "CfgRcvfCfg", 110 "CfgWaitRmt", 111 "CfgIdle", 112 "RecovRetrain", 113 "CfgTxRevLane", /* unused before IBA7220 */ 114 "RecovWaitRmt", 115 "RecovIdle", 116 /* below were added for IBA7220 */ 117 "CfgEnhanced", 118 "CfgTest", 119 "CfgWaitRmtTest", 120 "CfgWaitCfgEnhanced", 121 "SendTS_T", 122 "SendTstIdles", 123 "RcvTS_T", 124 "SendTst_TS1s", 125 "LTState18", "LTState19", "LTState1A", "LTState1B", 126 "LTState1C", "LTState1D", "LTState1E", "LTState1F" 127}; 128 129static void __devexit ipath_remove_one(struct pci_dev *); 130static int __devinit ipath_init_one(struct pci_dev *, 131 const struct pci_device_id *); 132 133/* Only needed for registration, nothing else needs this info */ 134#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 135#define PCI_DEVICE_ID_INFINIPATH_HT 0xd 136 137/* Number of seconds before our card status check... */ 138#define STATUS_TIMEOUT 60 139 140static const struct pci_device_id ipath_pci_tbl[] = { 141 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, 142 { 0, } 143}; 144 145MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); 146 147static struct pci_driver ipath_driver = { 148 .name = IPATH_DRV_NAME, 149 .probe = ipath_init_one, 150 .remove = __devexit_p(ipath_remove_one), 151 .id_table = ipath_pci_tbl, 152 .driver = { 153 .groups = ipath_driver_attr_groups, 154 }, 155}; 156 157static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 158 u32 *bar0, u32 *bar1) 159{ 160 int ret; 161 162 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); 163 if (ret) 164 ipath_dev_err(dd, "failed to read bar0 before enable: " 165 "error %d\n", -ret); 166 167 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); 168 if (ret) 169 ipath_dev_err(dd, "failed to read bar1 before enable: " 170 "error %d\n", -ret); 171 172 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); 173} 174 175static void ipath_free_devdata(struct pci_dev *pdev, 176 struct ipath_devdata *dd) 177{ 178 unsigned long flags; 179 180 pci_set_drvdata(pdev, NULL); 181 182 if (dd->ipath_unit != -1) { 183 spin_lock_irqsave(&ipath_devs_lock, flags); 184 idr_remove(&unit_table, dd->ipath_unit); 185 list_del(&dd->ipath_list); 186 spin_unlock_irqrestore(&ipath_devs_lock, flags); 187 } 188 vfree(dd); 189} 190 191static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) 192{ 193 unsigned long flags; 194 struct ipath_devdata *dd; 195 int ret; 196 197 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 198 dd = ERR_PTR(-ENOMEM); 199 goto bail; 200 } 201 202 dd = vmalloc(sizeof(*dd)); 203 if (!dd) { 204 dd = ERR_PTR(-ENOMEM); 205 goto bail; 206 } 207 memset(dd, 0, sizeof(*dd)); 208 dd->ipath_unit = -1; 209 210 spin_lock_irqsave(&ipath_devs_lock, flags); 211 212 ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); 213 if (ret < 0) { 214 printk(KERN_ERR IPATH_DRV_NAME 215 ": Could not allocate unit ID: error %d\n", -ret); 216 ipath_free_devdata(pdev, dd); 217 dd = ERR_PTR(ret); 218 goto bail_unlock; 219 } 220 221 dd->pcidev = pdev; 222 pci_set_drvdata(pdev, dd); 223 224 list_add(&dd->ipath_list, &ipath_dev_list); 225 226bail_unlock: 227 spin_unlock_irqrestore(&ipath_devs_lock, flags); 228 229bail: 230 return dd; 231} 232 233static inline struct ipath_devdata *__ipath_lookup(int unit) 234{ 235 return idr_find(&unit_table, unit); 236} 237 238struct ipath_devdata *ipath_lookup(int unit) 239{ 240 struct ipath_devdata *dd; 241 unsigned long flags; 242 243 spin_lock_irqsave(&ipath_devs_lock, flags); 244 dd = __ipath_lookup(unit); 245 spin_unlock_irqrestore(&ipath_devs_lock, flags); 246 247 return dd; 248} 249 250int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) 251{ 252 int nunits, npresent, nup; 253 struct ipath_devdata *dd; 254 unsigned long flags; 255 int maxports; 256 257 nunits = npresent = nup = maxports = 0; 258 259 spin_lock_irqsave(&ipath_devs_lock, flags); 260 261 list_for_each_entry(dd, &ipath_dev_list, ipath_list) { 262 nunits++; 263 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) 264 npresent++; 265 if (dd->ipath_lid && 266 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN 267 | IPATH_LINKUNK))) 268 nup++; 269 if (dd->ipath_cfgports > maxports) 270 maxports = dd->ipath_cfgports; 271 } 272 273 spin_unlock_irqrestore(&ipath_devs_lock, flags); 274 275 if (npresentp) 276 *npresentp = npresent; 277 if (nupp) 278 *nupp = nup; 279 if (maxportsp) 280 *maxportsp = maxports; 281 282 return nunits; 283} 284 285/* 286 * These next two routines are placeholders in case we don't have per-arch 287 * code for controlling write combining. If explicit control of write 288 * combining is not available, performance will probably be awful. 289 */ 290 291int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) 292{ 293 return -EOPNOTSUPP; 294} 295 296void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) 297{ 298} 299 300/* 301 * Perform a PIO buffer bandwidth write test, to verify proper system 302 * configuration. Even when all the setup calls work, occasionally 303 * BIOS or other issues can prevent write combining from working, or 304 * can cause other bandwidth problems to the chip. 305 * 306 * This test simply writes the same buffer over and over again, and 307 * measures close to the peak bandwidth to the chip (not testing 308 * data bandwidth to the wire). On chips that use an address-based 309 * trigger to send packets to the wire, this is easy. On chips that 310 * use a count to trigger, we want to make sure that the packet doesn't 311 * go out on the wire, or trigger flow control checks. 312 */ 313static void ipath_verify_pioperf(struct ipath_devdata *dd) 314{ 315 u32 pbnum, cnt, lcnt; 316 u32 __iomem *piobuf; 317 u32 *addr; 318 u64 msecs, emsecs; 319 320 piobuf = ipath_getpiobuf(dd, 0, &pbnum); 321 if (!piobuf) { 322 dev_info(&dd->pcidev->dev, 323 "No PIObufs for checking perf, skipping\n"); 324 return; 325 } 326 327 /* 328 * Enough to give us a reasonable test, less than piobuf size, and 329 * likely multiple of store buffer length. 330 */ 331 cnt = 1024; 332 333 addr = vmalloc(cnt); 334 if (!addr) { 335 dev_info(&dd->pcidev->dev, 336 "Couldn't get memory for checking PIO perf," 337 " skipping\n"); 338 goto done; 339 } 340 341 preempt_disable(); /* we want reasonably accurate elapsed time */ 342 msecs = 1 + jiffies_to_msecs(jiffies); 343 for (lcnt = 0; lcnt < 10000U; lcnt++) { 344 /* wait until we cross msec boundary */ 345 if (jiffies_to_msecs(jiffies) >= msecs) 346 break; 347 udelay(1); 348 } 349 350 ipath_disable_armlaunch(dd); 351 352 /* 353 * length 0, no dwords actually sent, and mark as VL15 354 * on chips where that may matter (due to IB flowcontrol) 355 */ 356 if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) 357 writeq(1UL << 63, piobuf); 358 else 359 writeq(0, piobuf); 360 ipath_flush_wc(); 361 362 /* 363 * this is only roughly accurate, since even with preempt we 364 * still take interrupts that could take a while. Running for 365 * >= 5 msec seems to get us "close enough" to accurate values 366 */ 367 msecs = jiffies_to_msecs(jiffies); 368 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 369 __iowrite32_copy(piobuf + 64, addr, cnt >> 2); 370 emsecs = jiffies_to_msecs(jiffies) - msecs; 371 } 372 373 /* 1 GiB/sec, slightly over IB SDR line rate */ 374 if (lcnt < (emsecs * 1024U)) 375 ipath_dev_err(dd, 376 "Performance problem: bandwidth to PIO buffers is " 377 "only %u MiB/sec\n", 378 lcnt / (u32) emsecs); 379 else 380 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", 381 lcnt / (u32) emsecs); 382 383 preempt_enable(); 384 385 vfree(addr); 386 387done: 388 /* disarm piobuf, so it's available again */ 389 ipath_disarm_piobufs(dd, pbnum, 1); 390 ipath_enable_armlaunch(dd); 391} 392 393static int __devinit ipath_init_one(struct pci_dev *pdev, 394 const struct pci_device_id *ent) 395{ 396 int ret, len, j; 397 struct ipath_devdata *dd; 398 unsigned long long addr; 399 u32 bar0 = 0, bar1 = 0; 400 u8 rev; 401 402 dd = ipath_alloc_devdata(pdev); 403 if (IS_ERR(dd)) { 404 ret = PTR_ERR(dd); 405 printk(KERN_ERR IPATH_DRV_NAME 406 ": Could not allocate devdata: error %d\n", -ret); 407 goto bail; 408 } 409 410 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 411 412 ret = pci_enable_device(pdev); 413 if (ret) { 414 /* This can happen iff: 415 * 416 * We did a chip reset, and then failed to reprogram the 417 * BAR, or the chip reset due to an internal error. We then 418 * unloaded the driver and reloaded it. 419 * 420 * Both reset cases set the BAR back to initial state. For 421 * the latter case, the AER sticky error bit at offset 0x718 422 * should be set, but the Linux kernel doesn't yet know 423 * about that, it appears. If the original BAR was retained 424 * in the kernel data structures, this may be OK. 425 */ 426 ipath_dev_err(dd, "enable unit %d failed: error %d\n", 427 dd->ipath_unit, -ret); 428 goto bail_devdata; 429 } 430 addr = pci_resource_start(pdev, 0); 431 len = pci_resource_len(pdev, 0); 432 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " 433 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, 434 ent->device, ent->driver_data); 435 436 read_bars(dd, pdev, &bar0, &bar1); 437 438 if (!bar1 && !(bar0 & ~0xf)) { 439 if (addr) { 440 dev_info(&pdev->dev, "BAR is 0 (probable RESET), " 441 "rewriting as %llx\n", addr); 442 ret = pci_write_config_dword( 443 pdev, PCI_BASE_ADDRESS_0, addr); 444 if (ret) { 445 ipath_dev_err(dd, "rewrite of BAR0 " 446 "failed: err %d\n", -ret); 447 goto bail_disable; 448 } 449 ret = pci_write_config_dword( 450 pdev, PCI_BASE_ADDRESS_1, addr >> 32); 451 if (ret) { 452 ipath_dev_err(dd, "rewrite of BAR1 " 453 "failed: err %d\n", -ret); 454 goto bail_disable; 455 } 456 } else { 457 ipath_dev_err(dd, "BAR is 0 (probable RESET), " 458 "not usable until reboot\n"); 459 ret = -ENODEV; 460 goto bail_disable; 461 } 462 } 463 464 ret = pci_request_regions(pdev, IPATH_DRV_NAME); 465 if (ret) { 466 dev_info(&pdev->dev, "pci_request_regions unit %u fails: " 467 "err %d\n", dd->ipath_unit, -ret); 468 goto bail_disable; 469 } 470 471 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 472 if (ret) { 473 /* 474 * if the 64 bit setup fails, try 32 bit. Some systems 475 * do not setup 64 bit maps on systems with 2GB or less 476 * memory installed. 477 */ 478 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 479 if (ret) { 480 dev_info(&pdev->dev, 481 "Unable to set DMA mask for unit %u: %d\n", 482 dd->ipath_unit, ret); 483 goto bail_regions; 484 } 485 else { 486 ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); 487 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 488 if (ret) 489 dev_info(&pdev->dev, 490 "Unable to set DMA consistent mask " 491 "for unit %u: %d\n", 492 dd->ipath_unit, ret); 493 494 } 495 } 496 else { 497 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 498 if (ret) 499 dev_info(&pdev->dev, 500 "Unable to set DMA consistent mask " 501 "for unit %u: %d\n", 502 dd->ipath_unit, ret); 503 } 504 505 pci_set_master(pdev); 506 507 /* 508 * Save BARs to rewrite after device reset. Save all 64 bits of 509 * BAR, just in case. 510 */ 511 dd->ipath_pcibar0 = addr; 512 dd->ipath_pcibar1 = addr >> 32; 513 dd->ipath_deviceid = ent->device; /* save for later use */ 514 dd->ipath_vendorid = ent->vendor; 515 516 /* setup the chip-specific functions, as early as possible. */ 517 switch (ent->device) { 518 case PCI_DEVICE_ID_INFINIPATH_HT: 519 ipath_init_iba6110_funcs(dd); 520 break; 521 522 default: 523 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 524 "failing\n", ent->device); 525 return -ENODEV; 526 } 527 528 for (j = 0; j < 6; j++) { 529 if (!pdev->resource[j].start) 530 continue; 531 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n", 532 j, (unsigned long long)pdev->resource[j].start, 533 (unsigned long long)pdev->resource[j].end, 534 (unsigned long long)pci_resource_len(pdev, j)); 535 } 536 537 if (!addr) { 538 ipath_dev_err(dd, "No valid address in BAR 0!\n"); 539 ret = -ENODEV; 540 goto bail_regions; 541 } 542 543 ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); 544 if (ret) { 545 ipath_dev_err(dd, "Failed to read PCI revision ID unit " 546 "%u: err %d\n", dd->ipath_unit, -ret); 547 goto bail_regions; /* shouldn't ever happen */ 548 } 549 dd->ipath_pcirev = rev; 550 551#if defined(__powerpc__) 552 /* There isn't a generic way to specify writethrough mappings */ 553 dd->ipath_kregbase = __ioremap(addr, len, 554 (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); 555#else 556 dd->ipath_kregbase = ioremap_nocache(addr, len); 557#endif 558 559 if (!dd->ipath_kregbase) { 560 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", 561 addr); 562 ret = -ENOMEM; 563 goto bail_iounmap; 564 } 565 dd->ipath_kregend = (u64 __iomem *) 566 ((void __iomem *)dd->ipath_kregbase + len); 567 dd->ipath_physaddr = addr; /* used for io_remap, etc. */ 568 /* for user mmap */ 569 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", 570 addr, dd->ipath_kregbase); 571 572 if (dd->ipath_f_bus(dd, pdev)) 573 ipath_dev_err(dd, "Failed to setup config space; " 574 "continuing anyway\n"); 575 576 /* 577 * set up our interrupt handler; IRQF_SHARED probably not needed, 578 * since MSI interrupts shouldn't be shared but won't hurt for now. 579 * check 0 irq after we return from chip-specific bus setup, since 580 * that can affect this due to setup 581 */ 582 if (!dd->ipath_irq) 583 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " 584 "work\n"); 585 else { 586 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, 587 IPATH_DRV_NAME, dd); 588 if (ret) { 589 ipath_dev_err(dd, "Couldn't setup irq handler, " 590 "irq=%d: %d\n", dd->ipath_irq, ret); 591 goto bail_iounmap; 592 } 593 } 594 595 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 596 if (ret) 597 goto bail_irqsetup; 598 599 ret = ipath_enable_wc(dd); 600 601 if (ret) { 602 ipath_dev_err(dd, "Write combining not enabled " 603 "(err %d): performance may be poor\n", 604 -ret); 605 ret = 0; 606 } 607 608 ipath_verify_pioperf(dd); 609 610 ipath_device_create_group(&pdev->dev, dd); 611 ipathfs_add_device(dd); 612 ipath_user_add(dd); 613 ipath_diag_add(dd); 614 ipath_register_ib_device(dd); 615 616 goto bail; 617 618bail_irqsetup: 619 if (pdev->irq) 620 free_irq(pdev->irq, dd); 621 622bail_iounmap: 623 iounmap((volatile void __iomem *) dd->ipath_kregbase); 624 625bail_regions: 626 pci_release_regions(pdev); 627 628bail_disable: 629 pci_disable_device(pdev); 630 631bail_devdata: 632 ipath_free_devdata(pdev, dd); 633 634bail: 635 return ret; 636} 637 638static void __devexit cleanup_device(struct ipath_devdata *dd) 639{ 640 int port; 641 struct ipath_portdata **tmp; 642 unsigned long flags; 643 644 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 645 /* can't do anything more with chip; needs re-init */ 646 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 647 if (dd->ipath_kregbase) { 648 /* 649 * if we haven't already cleaned up before these are 650 * to ensure any register reads/writes "fail" until 651 * re-init 652 */ 653 dd->ipath_kregbase = NULL; 654 dd->ipath_uregbase = 0; 655 dd->ipath_sregbase = 0; 656 dd->ipath_cregbase = 0; 657 dd->ipath_kregsize = 0; 658 } 659 ipath_disable_wc(dd); 660 } 661 662 if (dd->ipath_spectriggerhit) 663 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", 664 dd->ipath_spectriggerhit); 665 666 if (dd->ipath_pioavailregs_dma) { 667 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 668 (void *) dd->ipath_pioavailregs_dma, 669 dd->ipath_pioavailregs_phys); 670 dd->ipath_pioavailregs_dma = NULL; 671 } 672 if (dd->ipath_dummy_hdrq) { 673 dma_free_coherent(&dd->pcidev->dev, 674 dd->ipath_pd[0]->port_rcvhdrq_size, 675 dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); 676 dd->ipath_dummy_hdrq = NULL; 677 } 678 679 if (dd->ipath_pageshadow) { 680 struct page **tmpp = dd->ipath_pageshadow; 681 dma_addr_t *tmpd = dd->ipath_physshadow; 682 int i, cnt = 0; 683 684 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " 685 "locked\n"); 686 for (port = 0; port < dd->ipath_cfgports; port++) { 687 int port_tidbase = port * dd->ipath_rcvtidcnt; 688 int maxtid = port_tidbase + dd->ipath_rcvtidcnt; 689 for (i = port_tidbase; i < maxtid; i++) { 690 if (!tmpp[i]) 691 continue; 692 pci_unmap_page(dd->pcidev, tmpd[i], 693 PAGE_SIZE, PCI_DMA_FROMDEVICE); 694 ipath_release_user_pages(&tmpp[i], 1); 695 tmpp[i] = NULL; 696 cnt++; 697 } 698 } 699 if (cnt) { 700 ipath_stats.sps_pageunlocks += cnt; 701 ipath_cdbg(VERBOSE, "There were still %u expTID " 702 "entries locked\n", cnt); 703 } 704 if (ipath_stats.sps_pagelocks || 705 ipath_stats.sps_pageunlocks) 706 ipath_cdbg(VERBOSE, "%llu pages locked, %llu " 707 "unlocked via ipath_m{un}lock\n", 708 (unsigned long long) 709 ipath_stats.sps_pagelocks, 710 (unsigned long long) 711 ipath_stats.sps_pageunlocks); 712 713 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 714 dd->ipath_pageshadow); 715 tmpp = dd->ipath_pageshadow; 716 dd->ipath_pageshadow = NULL; 717 vfree(tmpp); 718 719 dd->ipath_egrtidbase = NULL; 720 } 721 722 /* 723 * free any resources still in use (usually just kernel ports) 724 * at unload; we do for portcnt, because that's what we allocate. 725 * We acquire lock to be really paranoid that ipath_pd isn't being 726 * accessed from some interrupt-related code (that should not happen, 727 * but best to be sure). 728 */ 729 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 730 tmp = dd->ipath_pd; 731 dd->ipath_pd = NULL; 732 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 733 for (port = 0; port < dd->ipath_portcnt; port++) { 734 struct ipath_portdata *pd = tmp[port]; 735 tmp[port] = NULL; /* debugging paranoia */ 736 ipath_free_pddata(dd, pd); 737 } 738 kfree(tmp); 739} 740 741static void __devexit ipath_remove_one(struct pci_dev *pdev) 742{ 743 struct ipath_devdata *dd = pci_get_drvdata(pdev); 744 745 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 746 747 /* 748 * disable the IB link early, to be sure no new packets arrive, which 749 * complicates the shutdown process 750 */ 751 ipath_shutdown_device(dd); 752 753 flush_scheduled_work(); 754 755 if (dd->verbs_dev) 756 ipath_unregister_ib_device(dd->verbs_dev); 757 758 ipath_diag_remove(dd); 759 ipath_user_remove(dd); 760 ipathfs_remove_device(dd); 761 ipath_device_remove_group(&pdev->dev, dd); 762 763 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " 764 "unit %u\n", dd, (u32) dd->ipath_unit); 765 766 cleanup_device(dd); 767 768 /* 769 * turn off rcv, send, and interrupts for all ports, all drivers 770 * should also hard reset the chip here? 771 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs 772 * for all versions of the driver, if they were allocated 773 */ 774 if (dd->ipath_irq) { 775 ipath_cdbg(VERBOSE, "unit %u free irq %d\n", 776 dd->ipath_unit, dd->ipath_irq); 777 dd->ipath_f_free_irq(dd); 778 } else 779 ipath_dbg("irq is 0, not doing free_irq " 780 "for unit %u\n", dd->ipath_unit); 781 /* 782 * we check for NULL here, because it's outside 783 * the kregbase check, and we need to call it 784 * after the free_irq. Thus it's possible that 785 * the function pointers were never initialized. 786 */ 787 if (dd->ipath_f_cleanup) 788 /* clean up chip-specific stuff */ 789 dd->ipath_f_cleanup(dd); 790 791 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); 792 iounmap((volatile void __iomem *) dd->ipath_kregbase); 793 pci_release_regions(pdev); 794 ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); 795 pci_disable_device(pdev); 796 797 ipath_free_devdata(pdev, dd); 798} 799 800/* general driver use */ 801DEFINE_MUTEX(ipath_mutex); 802 803static DEFINE_SPINLOCK(ipath_pioavail_lock); 804 805/** 806 * ipath_disarm_piobufs - cancel a range of PIO buffers 807 * @dd: the infinipath device 808 * @first: the first PIO buffer to cancel 809 * @cnt: the number of PIO buffers to cancel 810 * 811 * cancel a range of PIO buffers, used when they might be armed, but 812 * not triggered. Used at init to ensure buffer state, and also user 813 * process close, in case it died while writing to a PIO buffer 814 * Also after errors. 815 */ 816void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, 817 unsigned cnt) 818{ 819 unsigned i, last = first + cnt; 820 unsigned long flags; 821 822 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 823 for (i = first; i < last; i++) { 824 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 825 /* 826 * The disarm-related bits are write-only, so it 827 * is ok to OR them in with our copy of sendctrl 828 * while we hold the lock. 829 */ 830 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 831 dd->ipath_sendctrl | INFINIPATH_S_DISARM | 832 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); 833 /* can't disarm bufs back-to-back per iba7220 spec */ 834 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 835 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 836 } 837 /* on some older chips, update may not happen after cancel */ 838 ipath_force_pio_avail_update(dd); 839} 840 841/** 842 * ipath_wait_linkstate - wait for an IB link state change to occur 843 * @dd: the infinipath device 844 * @state: the state to wait for 845 * @msecs: the number of milliseconds to wait 846 * 847 * wait up to msecs milliseconds for IB link state change to occur for 848 * now, take the easy polling route. Currently used only by 849 * ipath_set_linkstate. Returns 0 if state reached, otherwise 850 * -ETIMEDOUT state can have multiple states set, for any of several 851 * transitions. 852 */ 853int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) 854{ 855 dd->ipath_state_wanted = state; 856 wait_event_interruptible_timeout(ipath_state_wait, 857 (dd->ipath_flags & state), 858 msecs_to_jiffies(msecs)); 859 dd->ipath_state_wanted = 0; 860 861 if (!(dd->ipath_flags & state)) { 862 u64 val; 863 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" 864 " ms\n", 865 /* test INIT ahead of DOWN, both can be set */ 866 (state & IPATH_LINKINIT) ? "INIT" : 867 ((state & IPATH_LINKDOWN) ? "DOWN" : 868 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), 869 msecs); 870 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 871 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", 872 (unsigned long long) ipath_read_kreg64( 873 dd, dd->ipath_kregs->kr_ibcctrl), 874 (unsigned long long) val, 875 ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); 876 } 877 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 878} 879 880static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, 881 char *buf, size_t blen) 882{ 883 static const struct { 884 ipath_err_t err; 885 const char *msg; 886 } errs[] = { 887 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, 888 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, 889 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, 890 { INFINIPATH_E_SDMABASE, "SDmaBase" }, 891 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, 892 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, 893 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, 894 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, 895 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, 896 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, 897 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, 898 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, 899 }; 900 int i; 901 int expected; 902 size_t bidx = 0; 903 904 for (i = 0; i < ARRAY_SIZE(errs); i++) { 905 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : 906 test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); 907 if ((err & errs[i].err) && !expected) 908 bidx += snprintf(buf + bidx, blen - bidx, 909 "%s ", errs[i].msg); 910 } 911} 912 913/* 914 * Decode the error status into strings, deciding whether to always 915 * print * it or not depending on "normal packet errors" vs everything 916 * else. Return 1 if "real" errors, otherwise 0 if only packet 917 * errors, so caller can decide what to print with the string. 918 */ 919int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, 920 ipath_err_t err) 921{ 922 int iserr = 1; 923 *buf = '\0'; 924 if (err & INFINIPATH_E_PKTERRS) { 925 if (!(err & ~INFINIPATH_E_PKTERRS)) 926 iserr = 0; // if only packet errors. 927 if (ipath_debug & __IPATH_ERRPKTDBG) { 928 if (err & INFINIPATH_E_REBP) 929 strlcat(buf, "EBP ", blen); 930 if (err & INFINIPATH_E_RVCRC) 931 strlcat(buf, "VCRC ", blen); 932 if (err & INFINIPATH_E_RICRC) { 933 strlcat(buf, "CRC ", blen); 934 // clear for check below, so only once 935 err &= INFINIPATH_E_RICRC; 936 } 937 if (err & INFINIPATH_E_RSHORTPKTLEN) 938 strlcat(buf, "rshortpktlen ", blen); 939 if (err & INFINIPATH_E_SDROPPEDDATAPKT) 940 strlcat(buf, "sdroppeddatapkt ", blen); 941 if (err & INFINIPATH_E_SPKTLEN) 942 strlcat(buf, "spktlen ", blen); 943 } 944 if ((err & INFINIPATH_E_RICRC) && 945 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) 946 strlcat(buf, "CRC ", blen); 947 if (!iserr) 948 goto done; 949 } 950 if (err & INFINIPATH_E_RHDRLEN) 951 strlcat(buf, "rhdrlen ", blen); 952 if (err & INFINIPATH_E_RBADTID) 953 strlcat(buf, "rbadtid ", blen); 954 if (err & INFINIPATH_E_RBADVERSION) 955 strlcat(buf, "rbadversion ", blen); 956 if (err & INFINIPATH_E_RHDR) 957 strlcat(buf, "rhdr ", blen); 958 if (err & INFINIPATH_E_SENDSPECIALTRIGGER) 959 strlcat(buf, "sendspecialtrigger ", blen); 960 if (err & INFINIPATH_E_RLONGPKTLEN) 961 strlcat(buf, "rlongpktlen ", blen); 962 if (err & INFINIPATH_E_RMAXPKTLEN) 963 strlcat(buf, "rmaxpktlen ", blen); 964 if (err & INFINIPATH_E_RMINPKTLEN) 965 strlcat(buf, "rminpktlen ", blen); 966 if (err & INFINIPATH_E_SMINPKTLEN) 967 strlcat(buf, "sminpktlen ", blen); 968 if (err & INFINIPATH_E_RFORMATERR) 969 strlcat(buf, "rformaterr ", blen); 970 if (err & INFINIPATH_E_RUNSUPVL) 971 strlcat(buf, "runsupvl ", blen); 972 if (err & INFINIPATH_E_RUNEXPCHAR) 973 strlcat(buf, "runexpchar ", blen); 974 if (err & INFINIPATH_E_RIBFLOW) 975 strlcat(buf, "ribflow ", blen); 976 if (err & INFINIPATH_E_SUNDERRUN) 977 strlcat(buf, "sunderrun ", blen); 978 if (err & INFINIPATH_E_SPIOARMLAUNCH) 979 strlcat(buf, "spioarmlaunch ", blen); 980 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 981 strlcat(buf, "sunexperrpktnum ", blen); 982 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 983 strlcat(buf, "sdroppedsmppkt ", blen); 984 if (err & INFINIPATH_E_SMAXPKTLEN) 985 strlcat(buf, "smaxpktlen ", blen); 986 if (err & INFINIPATH_E_SUNSUPVL) 987 strlcat(buf, "sunsupVL ", blen); 988 if (err & INFINIPATH_E_INVALIDADDR) 989 strlcat(buf, "invalidaddr ", blen); 990 if (err & INFINIPATH_E_RRCVEGRFULL) 991 strlcat(buf, "rcvegrfull ", blen); 992 if (err & INFINIPATH_E_RRCVHDRFULL) 993 strlcat(buf, "rcvhdrfull ", blen); 994 if (err & INFINIPATH_E_IBSTATUSCHANGED) 995 strlcat(buf, "ibcstatuschg ", blen); 996 if (err & INFINIPATH_E_RIBLOSTLINK) 997 strlcat(buf, "riblostlink ", blen); 998 if (err & INFINIPATH_E_HARDWARE) 999 strlcat(buf, "hardware ", blen); 1000 if (err & INFINIPATH_E_RESET) 1001 strlcat(buf, "reset ", blen); 1002 if (err & INFINIPATH_E_SDMAERRS) 1003 decode_sdma_errs(dd, err, buf, blen); 1004 if (err & INFINIPATH_E_INVALIDEEPCMD) 1005 strlcat(buf, "invalideepromcmd ", blen); 1006done: 1007 return iserr; 1008} 1009 1010/** 1011 * get_rhf_errstring - decode RHF errors 1012 * @err: the err number 1013 * @msg: the output buffer 1014 * @len: the length of the output buffer 1015 * 1016 * only used one place now, may want more later 1017 */ 1018static void get_rhf_errstring(u32 err, char *msg, size_t len) 1019{ 1020 /* if no errors, and so don't need to check what's first */ 1021 *msg = '\0'; 1022 1023 if (err & INFINIPATH_RHF_H_ICRCERR) 1024 strlcat(msg, "icrcerr ", len); 1025 if (err & INFINIPATH_RHF_H_VCRCERR) 1026 strlcat(msg, "vcrcerr ", len); 1027 if (err & INFINIPATH_RHF_H_PARITYERR) 1028 strlcat(msg, "parityerr ", len); 1029 if (err & INFINIPATH_RHF_H_LENERR) 1030 strlcat(msg, "lenerr ", len); 1031 if (err & INFINIPATH_RHF_H_MTUERR) 1032 strlcat(msg, "mtuerr ", len); 1033 if (err & INFINIPATH_RHF_H_IHDRERR) 1034 /* infinipath hdr checksum error */ 1035 strlcat(msg, "ipathhdrerr ", len); 1036 if (err & INFINIPATH_RHF_H_TIDERR) 1037 strlcat(msg, "tiderr ", len); 1038 if (err & INFINIPATH_RHF_H_MKERR) 1039 /* bad port, offset, etc. */ 1040 strlcat(msg, "invalid ipathhdr ", len); 1041 if (err & INFINIPATH_RHF_H_IBERR) 1042 strlcat(msg, "iberr ", len); 1043 if (err & INFINIPATH_RHF_L_SWA) 1044 strlcat(msg, "swA ", len); 1045 if (err & INFINIPATH_RHF_L_SWB) 1046 strlcat(msg, "swB ", len); 1047} 1048 1049/** 1050 * ipath_get_egrbuf - get an eager buffer 1051 * @dd: the infinipath device 1052 * @bufnum: the eager buffer to get 1053 * 1054 * must only be called if ipath_pd[port] is known to be allocated 1055 */ 1056static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) 1057{ 1058 return dd->ipath_port0_skbinfo ? 1059 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; 1060} 1061 1062/** 1063 * ipath_alloc_skb - allocate an skb and buffer with possible constraints 1064 * @dd: the infinipath device 1065 * @gfp_mask: the sk_buff SFP mask 1066 */ 1067struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, 1068 gfp_t gfp_mask) 1069{ 1070 struct sk_buff *skb; 1071 u32 len; 1072 1073 /* 1074 * Only fully supported way to handle this is to allocate lots 1075 * extra, align as needed, and then do skb_reserve(). That wastes 1076 * a lot of memory... I'll have to hack this into infinipath_copy 1077 * also. 1078 */ 1079 1080 /* 1081 * We need 2 extra bytes for ipath_ether data sent in the 1082 * key header. In order to keep everything dword aligned, 1083 * we'll reserve 4 bytes. 1084 */ 1085 len = dd->ipath_ibmaxlen + 4; 1086 1087 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1088 /* We need a 2KB multiple alignment, and there is no way 1089 * to do it except to allocate extra and then skb_reserve 1090 * enough to bring it up to the right alignment. 1091 */ 1092 len += 2047; 1093 } 1094 1095 skb = __dev_alloc_skb(len, gfp_mask); 1096 if (!skb) { 1097 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", 1098 len); 1099 goto bail; 1100 } 1101 1102 skb_reserve(skb, 4); 1103 1104 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1105 u32 una = (unsigned long)skb->data & 2047; 1106 if (una) 1107 skb_reserve(skb, 2048 - una); 1108 } 1109 1110bail: 1111 return skb; 1112} 1113 1114static void ipath_rcv_hdrerr(struct ipath_devdata *dd, 1115 u32 eflags, 1116 u32 l, 1117 u32 etail, 1118 __le32 *rhf_addr, 1119 struct ipath_message_header *hdr) 1120{ 1121 char emsg[128]; 1122 1123 get_rhf_errstring(eflags, emsg, sizeof emsg); 1124 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " 1125 "tlen=%x opcode=%x egridx=%x: %s\n", 1126 eflags, l, 1127 ipath_hdrget_rcv_type(rhf_addr), 1128 ipath_hdrget_length_in_bytes(rhf_addr), 1129 be32_to_cpu(hdr->bth[0]) >> 24, 1130 etail, emsg); 1131 1132 /* Count local link integrity errors. */ 1133 if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { 1134 u8 n = (dd->ipath_ibcctrl >> 1135 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & 1136 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; 1137 1138 if (++dd->ipath_lli_counter > n) { 1139 dd->ipath_lli_counter = 0; 1140 dd->ipath_lli_errors++; 1141 } 1142 } 1143} 1144 1145/* 1146 * ipath_kreceive - receive a packet 1147 * @pd: the infinipath port 1148 * 1149 * called from interrupt handler for errors or receive interrupt 1150 */ 1151void ipath_kreceive(struct ipath_portdata *pd) 1152{ 1153 struct ipath_devdata *dd = pd->port_dd; 1154 __le32 *rhf_addr; 1155 void *ebuf; 1156 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ 1157 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ 1158 u32 etail = -1, l, hdrqtail; 1159 struct ipath_message_header *hdr; 1160 u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; 1161 static u64 totcalls; /* stats, may eventually remove */ 1162 int last; 1163 1164 l = pd->port_head; 1165 rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; 1166 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1167 u32 seq = ipath_hdrget_seq(rhf_addr); 1168 1169 if (seq != pd->port_seq_cnt) 1170 goto bail; 1171 hdrqtail = 0; 1172 } else { 1173 hdrqtail = ipath_get_rcvhdrtail(pd); 1174 if (l == hdrqtail) 1175 goto bail; 1176 smp_rmb(); 1177 } 1178 1179reloop: 1180 for (last = 0, i = 1; !last; i += !last) { 1181 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1182 eflags = ipath_hdrget_err_flags(rhf_addr); 1183 etype = ipath_hdrget_rcv_type(rhf_addr); 1184 /* total length */ 1185 tlen = ipath_hdrget_length_in_bytes(rhf_addr); 1186 ebuf = NULL; 1187 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? 1188 ipath_hdrget_use_egr_buf(rhf_addr) : 1189 (etype != RCVHQ_RCV_TYPE_EXPECTED)) { 1190 /* 1191 * It turns out that the chip uses an eager buffer 1192 * for all non-expected packets, whether it "needs" 1193 * one or not. So always get the index, but don't 1194 * set ebuf (so we try to copy data) unless the 1195 * length requires it. 1196 */ 1197 etail = ipath_hdrget_index(rhf_addr); 1198 updegr = 1; 1199 if (tlen > sizeof(*hdr) || 1200 etype == RCVHQ_RCV_TYPE_NON_KD) 1201 ebuf = ipath_get_egrbuf(dd, etail); 1202 } 1203 1204 /* 1205 * both tiderr and ipathhdrerr are set for all plain IB 1206 * packets; only ipathhdrerr should be set. 1207 */ 1208 1209 if (etype != RCVHQ_RCV_TYPE_NON_KD && 1210 etype != RCVHQ_RCV_TYPE_ERROR && 1211 ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != 1212 IPS_PROTO_VERSION) 1213 ipath_cdbg(PKT, "Bad InfiniPath protocol version " 1214 "%x\n", etype); 1215 1216 if (unlikely(eflags)) 1217 ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); 1218 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 1219 ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); 1220 if (dd->ipath_lli_counter) 1221 dd->ipath_lli_counter--; 1222 } else if (etype == RCVHQ_RCV_TYPE_EAGER) { 1223 u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; 1224 u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; 1225 ipath_cdbg(PKT, "typ %x, opcode %x (eager, " 1226 "qp=%x), len %x; ignored\n", 1227 etype, opcode, qp, tlen); 1228 } 1229 else if (etype == RCVHQ_RCV_TYPE_EXPECTED) 1230 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", 1231 be32_to_cpu(hdr->bth[0]) >> 24); 1232 else { 1233 /* 1234 * error packet, type of error unknown. 1235 * Probably type 3, but we don't know, so don't 1236 * even try to print the opcode, etc. 1237 * Usually caused by a "bad packet", that has no 1238 * BTH, when the LRH says it should. 1239 */ 1240 ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" 1241 " %x, len %x hdrq+%x rhf: %Lx\n", 1242 etail, tlen, l, (unsigned long long) 1243 le64_to_cpu(*(__le64 *) rhf_addr)); 1244 if (ipath_debug & __IPATH_ERRPKTDBG) { 1245 u32 j, *d, dw = rsize-2; 1246 if (rsize > (tlen>>2)) 1247 dw = tlen>>2; 1248 d = (u32 *)hdr; 1249 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", 1250 dw); 1251 for (j = 0; j < dw; j++) 1252 printk(KERN_DEBUG "%8x%s", d[j], 1253 (j%8) == 7 ? "\n" : " "); 1254 printk(KERN_DEBUG ".\n"); 1255 } 1256 } 1257 l += rsize; 1258 if (l >= maxcnt) 1259 l = 0; 1260 rhf_addr = (__le32 *) pd->port_rcvhdrq + 1261 l + dd->ipath_rhf_offset; 1262 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1263 u32 seq = ipath_hdrget_seq(rhf_addr); 1264 1265 if (++pd->port_seq_cnt > 13) 1266 pd->port_seq_cnt = 1; 1267 if (seq != pd->port_seq_cnt) 1268 last = 1; 1269 } else if (l == hdrqtail) 1270 last = 1; 1271 /* 1272 * update head regs on last packet, and every 16 packets. 1273 * Reduce bus traffic, while still trying to prevent 1274 * rcvhdrq overflows, for when the queue is nearly full 1275 */ 1276 if (last || !(i & 0xf)) { 1277 u64 lval = l; 1278 1279 /* request IBA6120 and 7220 interrupt only on last */ 1280 if (last) 1281 lval |= dd->ipath_rhdrhead_intr_off; 1282 ipath_write_ureg(dd, ur_rcvhdrhead, lval, 1283 pd->port_port); 1284 if (updegr) { 1285 ipath_write_ureg(dd, ur_rcvegrindexhead, 1286 etail, pd->port_port); 1287 updegr = 0; 1288 } 1289 } 1290 } 1291 1292 if (!dd->ipath_rhdrhead_intr_off && !reloop && 1293 !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1294 /* IBA6110 workaround; we can have a race clearing chip 1295 * interrupt with another interrupt about to be delivered, 1296 * and can clear it before it is delivered on the GPIO 1297 * workaround. By doing the extra check here for the 1298 * in-memory tail register updating while we were doing 1299 * earlier packets, we "almost" guarantee we have covered 1300 * that case. 1301 */ 1302 u32 hqtail = ipath_get_rcvhdrtail(pd); 1303 if (hqtail != hdrqtail) { 1304 hdrqtail = hqtail; 1305 reloop = 1; /* loop 1 extra time at most */ 1306 goto reloop; 1307 } 1308 } 1309 1310 pkttot += i; 1311 1312 pd->port_head = l; 1313 1314 if (pkttot > ipath_stats.sps_maxpkts_call) 1315 ipath_stats.sps_maxpkts_call = pkttot; 1316 ipath_stats.sps_port0pkts += pkttot; 1317 ipath_stats.sps_avgpkts_call = 1318 ipath_stats.sps_port0pkts / ++totcalls; 1319 1320bail:; 1321} 1322 1323/** 1324 * ipath_update_pio_bufs - update shadow copy of the PIO availability map 1325 * @dd: the infinipath device 1326 * 1327 * called whenever our local copy indicates we have run out of send buffers 1328 * NOTE: This can be called from interrupt context by some code 1329 * and from non-interrupt context by ipath_getpiobuf(). 1330 */ 1331 1332static void ipath_update_pio_bufs(struct ipath_devdata *dd) 1333{ 1334 unsigned long flags; 1335 int i; 1336 const unsigned piobregs = (unsigned)dd->ipath_pioavregs; 1337 1338 /* If the generation (check) bits have changed, then we update the 1339 * busy bit for the corresponding PIO buffer. This algorithm will 1340 * modify positions to the value they already have in some cases 1341 * (i.e., no change), but it's faster than changing only the bits 1342 * that have changed. 1343 * 1344 * We would like to do this atomicly, to avoid spinlocks in the 1345 * critical send path, but that's not really possible, given the 1346 * type of changes, and that this routine could be called on 1347 * multiple cpu's simultaneously, so we lock in this routine only, 1348 * to avoid conflicting updates; all we change is the shadow, and 1349 * it's a single 64 bit memory location, so by definition the update 1350 * is atomic in terms of what other cpu's can see in testing the 1351 * bits. The spin_lock overhead isn't too bad, since it only 1352 * happens when all buffers are in use, so only cpu overhead, not 1353 * latency or bandwidth is affected. 1354 */ 1355 if (!dd->ipath_pioavailregs_dma) { 1356 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); 1357 return; 1358 } 1359 if (ipath_debug & __IPATH_VERBDBG) { 1360 /* only if packet debug and verbose */ 1361 volatile __le64 *dma = dd->ipath_pioavailregs_dma; 1362 unsigned long *shadow = dd->ipath_pioavailshadow; 1363 1364 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " 1365 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " 1366 "s3=%lx\n", 1367 (unsigned long long) le64_to_cpu(dma[0]), 1368 shadow[0], 1369 (unsigned long long) le64_to_cpu(dma[1]), 1370 shadow[1], 1371 (unsigned long long) le64_to_cpu(dma[2]), 1372 shadow[2], 1373 (unsigned long long) le64_to_cpu(dma[3]), 1374 shadow[3]); 1375 if (piobregs > 4) 1376 ipath_cdbg( 1377 PKT, "2nd group, dma4=%llx shad4=%lx, " 1378 "d5=%llx s5=%lx, d6=%llx s6=%lx, " 1379 "d7=%llx s7=%lx\n", 1380 (unsigned long long) le64_to_cpu(dma[4]), 1381 shadow[4], 1382 (unsigned long long) le64_to_cpu(dma[5]), 1383 shadow[5], 1384 (unsigned long long) le64_to_cpu(dma[6]), 1385 shadow[6], 1386 (unsigned long long) le64_to_cpu(dma[7]), 1387 shadow[7]); 1388 } 1389 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1390 for (i = 0; i < piobregs; i++) { 1391 u64 pchbusy, pchg, piov, pnew; 1392 /* 1393 * Chip Errata: bug 6641; even and odd qwords>3 are swapped 1394 */ 1395 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) 1396 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); 1397 else 1398 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1399 pchg = dd->ipath_pioavailkernel[i] & 1400 ~(dd->ipath_pioavailshadow[i] ^ piov); 1401 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; 1402 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { 1403 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; 1404 pnew |= piov & pchbusy; 1405 dd->ipath_pioavailshadow[i] = pnew; 1406 } 1407 } 1408 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1409} 1410 1411/* 1412 * used to force update of pioavailshadow if we can't get a pio buffer. 1413 * Needed primarily due to exitting freeze mode after recovering 1414 * from errors. Done lazily, because it's safer (known to not 1415 * be writing pio buffers). 1416 */ 1417static void ipath_reset_availshadow(struct ipath_devdata *dd) 1418{ 1419 int i, im; 1420 unsigned long flags; 1421 1422 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1423 for (i = 0; i < dd->ipath_pioavregs; i++) { 1424 u64 val, oldval; 1425 /* deal with 6110 chip bug on high register #s */ 1426 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1427 i ^ 1 : i; 1428 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); 1429 /* 1430 * busy out the buffers not in the kernel avail list, 1431 * without changing the generation bits. 1432 */ 1433 oldval = dd->ipath_pioavailshadow[i]; 1434 dd->ipath_pioavailshadow[i] = val | 1435 ((~dd->ipath_pioavailkernel[i] << 1436 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & 1437 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ 1438 if (oldval != dd->ipath_pioavailshadow[i]) 1439 ipath_dbg("shadow[%d] was %Lx, now %lx\n", 1440 i, (unsigned long long) oldval, 1441 dd->ipath_pioavailshadow[i]); 1442 } 1443 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1444} 1445 1446/** 1447 * ipath_setrcvhdrsize - set the receive header size 1448 * @dd: the infinipath device 1449 * @rhdrsize: the receive header size 1450 * 1451 * called from user init code, and also layered driver init 1452 */ 1453int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) 1454{ 1455 int ret = 0; 1456 1457 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { 1458 if (dd->ipath_rcvhdrsize != rhdrsize) { 1459 dev_info(&dd->pcidev->dev, 1460 "Error: can't set protocol header " 1461 "size %u, already %u\n", 1462 rhdrsize, dd->ipath_rcvhdrsize); 1463 ret = -EAGAIN; 1464 } else 1465 ipath_cdbg(VERBOSE, "Reuse same protocol header " 1466 "size %u\n", dd->ipath_rcvhdrsize); 1467 } else if (rhdrsize > (dd->ipath_rcvhdrentsize - 1468 (sizeof(u64) / sizeof(u32)))) { 1469 ipath_dbg("Error: can't set protocol header size %u " 1470 "(> max %u)\n", rhdrsize, 1471 dd->ipath_rcvhdrentsize - 1472 (u32) (sizeof(u64) / sizeof(u32))); 1473 ret = -EOVERFLOW; 1474 } else { 1475 dd->ipath_flags |= IPATH_RCVHDRSZ_SET; 1476 dd->ipath_rcvhdrsize = rhdrsize; 1477 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, 1478 dd->ipath_rcvhdrsize); 1479 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", 1480 dd->ipath_rcvhdrsize); 1481 } 1482 return ret; 1483} 1484 1485/* 1486 * debugging code and stats updates if no pio buffers available. 1487 */ 1488static noinline void no_pio_bufs(struct ipath_devdata *dd) 1489{ 1490 unsigned long *shadow = dd->ipath_pioavailshadow; 1491 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; 1492 1493 dd->ipath_upd_pio_shadow = 1; 1494 1495 /* 1496 * not atomic, but if we lose a stat count in a while, that's OK 1497 */ 1498 ipath_stats.sps_nopiobufs++; 1499 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1500 ipath_force_pio_avail_update(dd); /* at start */ 1501 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " 1502 "%llx %llx %llx %llx\n" 1503 "ipath shadow: %lx %lx %lx %lx\n", 1504 dd->ipath_consec_nopiobuf, 1505 (unsigned long)get_cycles(), 1506 (unsigned long long) le64_to_cpu(dma[0]), 1507 (unsigned long long) le64_to_cpu(dma[1]), 1508 (unsigned long long) le64_to_cpu(dma[2]), 1509 (unsigned long long) le64_to_cpu(dma[3]), 1510 shadow[0], shadow[1], shadow[2], shadow[3]); 1511 /* 1512 * 4 buffers per byte, 4 registers above, cover rest 1513 * below 1514 */ 1515 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1516 (sizeof(shadow[0]) * 4 * 4)) 1517 ipath_dbg("2nd group: dmacopy: " 1518 "%llx %llx %llx %llx\n" 1519 "ipath shadow: %lx %lx %lx %lx\n", 1520 (unsigned long long)le64_to_cpu(dma[4]), 1521 (unsigned long long)le64_to_cpu(dma[5]), 1522 (unsigned long long)le64_to_cpu(dma[6]), 1523 (unsigned long long)le64_to_cpu(dma[7]), 1524 shadow[4], shadow[5], shadow[6], shadow[7]); 1525 1526 /* at end, so update likely happened */ 1527 ipath_reset_availshadow(dd); 1528 } 1529} 1530 1531/* 1532 * common code for normal driver pio buffer allocation, and reserved 1533 * allocation. 1534 * 1535 * do appropriate marking as busy, etc. 1536 * returns buffer number if one found (>=0), negative number is error. 1537 */ 1538static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, 1539 u32 *pbufnum, u32 first, u32 last, u32 firsti) 1540{ 1541 int i, j, updated = 0; 1542 unsigned piobcnt; 1543 unsigned long flags; 1544 unsigned long *shadow = dd->ipath_pioavailshadow; 1545 u32 __iomem *buf; 1546 1547 piobcnt = last - first; 1548 if (dd->ipath_upd_pio_shadow) { 1549 /* 1550 * Minor optimization. If we had no buffers on last call, 1551 * start out by doing the update; continue and do scan even 1552 * if no buffers were updated, to be paranoid 1553 */ 1554 ipath_update_pio_bufs(dd); 1555 updated++; 1556 i = first; 1557 } else 1558 i = firsti; 1559rescan: 1560 /* 1561 * while test_and_set_bit() is atomic, we do that and then the 1562 * change_bit(), and the pair is not. See if this is the cause 1563 * of the remaining armlaunch errors. 1564 */ 1565 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1566 for (j = 0; j < piobcnt; j++, i++) { 1567 if (i >= last) 1568 i = first; 1569 if (__test_and_set_bit((2 * i) + 1, shadow)) 1570 continue; 1571 /* flip generation bit */ 1572 __change_bit(2 * i, shadow); 1573 break; 1574 } 1575 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1576 1577 if (j == piobcnt) { 1578 if (!updated) { 1579 /* 1580 * first time through; shadow exhausted, but may be 1581 * buffers available, try an update and then rescan. 1582 */ 1583 ipath_update_pio_bufs(dd); 1584 updated++; 1585 i = first; 1586 goto rescan; 1587 } else if (updated == 1 && piobcnt <= 1588 ((dd->ipath_sendctrl 1589 >> INFINIPATH_S_UPDTHRESH_SHIFT) & 1590 INFINIPATH_S_UPDTHRESH_MASK)) { 1591 /* 1592 * for chips supporting and using the update 1593 * threshold we need to force an update of the 1594 * in-memory copy if the count is less than the 1595 * thershold, then check one more time. 1596 */ 1597 ipath_force_pio_avail_update(dd); 1598 ipath_update_pio_bufs(dd); 1599 updated++; 1600 i = first; 1601 goto rescan; 1602 } 1603 1604 no_pio_bufs(dd); 1605 buf = NULL; 1606 } else { 1607 if (i < dd->ipath_piobcnt2k) 1608 buf = (u32 __iomem *) (dd->ipath_pio2kbase + 1609 i * dd->ipath_palign); 1610 else 1611 buf = (u32 __iomem *) 1612 (dd->ipath_pio4kbase + 1613 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); 1614 if (pbufnum) 1615 *pbufnum = i; 1616 } 1617 1618 return buf; 1619} 1620 1621/** 1622 * ipath_getpiobuf - find an available pio buffer 1623 * @dd: the infinipath device 1624 * @plen: the size of the PIO buffer needed in 32-bit words 1625 * @pbufnum: the buffer number is placed here 1626 */ 1627u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) 1628{ 1629 u32 __iomem *buf; 1630 u32 pnum, nbufs; 1631 u32 first, lasti; 1632 1633 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { 1634 first = dd->ipath_piobcnt2k; 1635 lasti = dd->ipath_lastpioindexl; 1636 } else { 1637 first = 0; 1638 lasti = dd->ipath_lastpioindex; 1639 } 1640 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 1641 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); 1642 1643 if (buf) { 1644 /* 1645 * Set next starting place. It's just an optimization, 1646 * it doesn't matter who wins on this, so no locking 1647 */ 1648 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1649 dd->ipath_lastpioindexl = pnum + 1; 1650 else 1651 dd->ipath_lastpioindex = pnum + 1; 1652 if (dd->ipath_upd_pio_shadow) 1653 dd->ipath_upd_pio_shadow = 0; 1654 if (dd->ipath_consec_nopiobuf) 1655 dd->ipath_consec_nopiobuf = 0; 1656 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", 1657 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); 1658 if (pbufnum) 1659 *pbufnum = pnum; 1660 1661 } 1662 return buf; 1663} 1664 1665/** 1666 * ipath_chg_pioavailkernel - change which send buffers are available for kernel 1667 * @dd: the infinipath device 1668 * @start: the starting send buffer number 1669 * @len: the number of send buffers 1670 * @avail: true if the buffers are available for kernel use, false otherwise 1671 */ 1672void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, 1673 unsigned len, int avail) 1674{ 1675 unsigned long flags; 1676 unsigned end, cnt = 0; 1677 1678 /* There are two bits per send buffer (busy and generation) */ 1679 start *= 2; 1680 end = start + len * 2; 1681 1682 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1683 /* Set or clear the busy bit in the shadow. */ 1684 while (start < end) { 1685 if (avail) { 1686 unsigned long dma; 1687 int i, im; 1688 /* 1689 * the BUSY bit will never be set, because we disarm 1690 * the user buffers before we hand them back to the 1691 * kernel. We do have to make sure the generation 1692 * bit is set correctly in shadow, since it could 1693 * have changed many times while allocated to user. 1694 * We can't use the bitmap functions on the full 1695 * dma array because it is always little-endian, so 1696 * we have to flip to host-order first. 1697 * BITS_PER_LONG is slightly wrong, since it's 1698 * always 64 bits per register in chip... 1699 * We only work on 64 bit kernels, so that's OK. 1700 */ 1701 /* deal with 6110 chip bug on high register #s */ 1702 i = start / BITS_PER_LONG; 1703 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1704 i ^ 1 : i; 1705 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1706 + start, dd->ipath_pioavailshadow); 1707 dma = (unsigned long) le64_to_cpu( 1708 dd->ipath_pioavailregs_dma[im]); 1709 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1710 + start) % BITS_PER_LONG, &dma)) 1711 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1712 + start, dd->ipath_pioavailshadow); 1713 else 1714 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1715 + start, dd->ipath_pioavailshadow); 1716 __set_bit(start, dd->ipath_pioavailkernel); 1717 } else { 1718 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1719 dd->ipath_pioavailshadow); 1720 __clear_bit(start, dd->ipath_pioavailkernel); 1721 } 1722 start += 2; 1723 } 1724 1725 if (dd->ipath_pioupd_thresh) { 1726 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1727 cnt = bitmap_weight(dd->ipath_pioavailkernel, end); 1728 } 1729 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1730 1731 /* 1732 * When moving buffers from kernel to user, if number assigned to 1733 * the user is less than the pio update threshold, and threshold 1734 * is supported (cnt was computed > 0), drop the update threshold 1735 * so we update at least once per allocated number of buffers. 1736 * In any case, if the kernel buffers are less than the threshold, 1737 * drop the threshold. We don't bother increasing it, having once 1738 * decreased it, since it would typically just cycle back and forth. 1739 * If we don't decrease below buffers in use, we can wait a long 1740 * time for an update, until some other context uses PIO buffers. 1741 */ 1742 if (!avail && len < cnt) 1743 cnt = len; 1744 if (cnt < dd->ipath_pioupd_thresh) { 1745 dd->ipath_pioupd_thresh = cnt; 1746 ipath_dbg("Decreased pio update threshold to %u\n", 1747 dd->ipath_pioupd_thresh); 1748 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1749 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK 1750 << INFINIPATH_S_UPDTHRESH_SHIFT); 1751 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh 1752 << INFINIPATH_S_UPDTHRESH_SHIFT; 1753 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1754 dd->ipath_sendctrl); 1755 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1756 } 1757} 1758 1759/** 1760 * ipath_create_rcvhdrq - create a receive header queue 1761 * @dd: the infinipath device 1762 * @pd: the port data 1763 * 1764 * this must be contiguous memory (from an i/o perspective), and must be 1765 * DMA'able (which means for some systems, it will go through an IOMMU, 1766 * or be forced into a low address range). 1767 */ 1768int ipath_create_rcvhdrq(struct ipath_devdata *dd, 1769 struct ipath_portdata *pd) 1770{ 1771 int ret = 0; 1772 1773 if (!pd->port_rcvhdrq) { 1774 dma_addr_t phys_hdrqtail; 1775 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 1776 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1777 sizeof(u32), PAGE_SIZE); 1778 1779 pd->port_rcvhdrq = dma_alloc_coherent( 1780 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, 1781 gfp_flags); 1782 1783 if (!pd->port_rcvhdrq) { 1784 ipath_dev_err(dd, "attempt to allocate %d bytes " 1785 "for port %u rcvhdrq failed\n", 1786 amt, pd->port_port); 1787 ret = -ENOMEM; 1788 goto bail; 1789 } 1790 1791 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1792 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( 1793 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1794 GFP_KERNEL); 1795 if (!pd->port_rcvhdrtail_kvaddr) { 1796 ipath_dev_err(dd, "attempt to allocate 1 page " 1797 "for port %u rcvhdrqtailaddr " 1798 "failed\n", pd->port_port); 1799 ret = -ENOMEM; 1800 dma_free_coherent(&dd->pcidev->dev, amt, 1801 pd->port_rcvhdrq, 1802 pd->port_rcvhdrq_phys); 1803 pd->port_rcvhdrq = NULL; 1804 goto bail; 1805 } 1806 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; 1807 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " 1808 "physical\n", pd->port_port, 1809 (unsigned long long) phys_hdrqtail); 1810 } 1811 1812 pd->port_rcvhdrq_size = amt; 1813 1814 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " 1815 "for port %u rcvhdr Q\n", 1816 amt >> PAGE_SHIFT, pd->port_rcvhdrq, 1817 (unsigned long) pd->port_rcvhdrq_phys, 1818 (unsigned long) pd->port_rcvhdrq_size, 1819 pd->port_port); 1820 } 1821 else 1822 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " 1823 "hdrtailaddr@%p %llx physical\n", 1824 pd->port_port, pd->port_rcvhdrq, 1825 (unsigned long long) pd->port_rcvhdrq_phys, 1826 pd->port_rcvhdrtail_kvaddr, (unsigned long long) 1827 pd->port_rcvhdrqtailaddr_phys); 1828 1829 /* clear for security and sanity on each use */ 1830 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); 1831 if (pd->port_rcvhdrtail_kvaddr) 1832 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1833 1834 /* 1835 * tell chip each time we init it, even if we are re-using previous 1836 * memory (we zero the register at process close) 1837 */ 1838 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 1839 pd->port_port, pd->port_rcvhdrqtailaddr_phys); 1840 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1841 pd->port_port, pd->port_rcvhdrq_phys); 1842 1843bail: 1844 return ret; 1845} 1846 1847 1848/* 1849 * Flush all sends that might be in the ready to send state, as well as any 1850 * that are in the process of being sent. Used whenever we need to be 1851 * sure the send side is idle. Cleans up all buffer state by canceling 1852 * all pio buffers, and issuing an abort, which cleans up anything in the 1853 * launch fifo. The cancel is superfluous on some chip versions, but 1854 * it's safer to always do it. 1855 * PIOAvail bits are updated by the chip as if normal send had happened. 1856 */ 1857void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) 1858{ 1859 unsigned long flags; 1860 1861 if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { 1862 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); 1863 goto bail; 1864 } 1865 /* 1866 * If we have SDMA, and it's not disabled, we have to kick off the 1867 * abort state machine, provided we aren't already aborting. 1868 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), 1869 * we skip the rest of this routine. It is already "in progress" 1870 */ 1871 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { 1872 int skip_cancel; 1873 unsigned long *statp = &dd->ipath_sdma_status; 1874 1875 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1876 skip_cancel = 1877 test_and_set_bit(IPATH_SDMA_ABORTING, statp) 1878 && !test_bit(IPATH_SDMA_DISABLED, statp); 1879 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1880 if (skip_cancel) 1881 goto bail; 1882 } 1883 1884 ipath_dbg("Cancelling all in-progress send buffers\n"); 1885 1886 /* skip armlaunch errs for a while */ 1887 dd->ipath_lastcancel = jiffies + HZ / 2; 1888 1889 /* 1890 * The abort bit is auto-clearing. We also don't want pioavail 1891 * update happening during this, and we don't want any other 1892 * sends going out, so turn those off for the duration. We read 1893 * the scratch register to be sure that cancels and the abort 1894 * have taken effect in the chip. Otherwise two parts are same 1895 * as ipath_force_pio_avail_update() 1896 */ 1897 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1898 dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD 1899 | INFINIPATH_S_PIOENABLE); 1900 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1901 dd->ipath_sendctrl | INFINIPATH_S_ABORT); 1902 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1903 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1904 1905 /* disarm all send buffers */ 1906 ipath_disarm_piobufs(dd, 0, 1907 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1908 1909 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1910 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); 1911 1912 if (restore_sendctrl) { 1913 /* else done by caller later if needed */ 1914 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1915 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | 1916 INFINIPATH_S_PIOENABLE; 1917 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1918 dd->ipath_sendctrl); 1919 /* and again, be sure all have hit the chip */ 1920 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1921 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1922 } 1923 1924 if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && 1925 !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && 1926 test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { 1927 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1928 /* only wait so long for intr */ 1929 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1930 dd->ipath_sdma_reset_wait = 200; 1931 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1932 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1933 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1934 } 1935bail:; 1936} 1937 1938/* 1939 * Force an update of in-memory copy of the pioavail registers, when 1940 * needed for any of a variety of reasons. We read the scratch register 1941 * to make it highly likely that the update will have happened by the 1942 * time we return. If already off (as in cancel_sends above), this 1943 * routine is a nop, on the assumption that the caller will "do the 1944 * right thing". 1945 */ 1946void ipath_force_pio_avail_update(struct ipath_devdata *dd) 1947{ 1948 unsigned long flags; 1949 1950 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1951 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { 1952 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1953 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); 1954 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1955 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1956 dd->ipath_sendctrl); 1957 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1958 } 1959 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1960} 1961 1962static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, 1963 int linitcmd) 1964{ 1965 u64 mod_wd; 1966 static const char *what[4] = { 1967 [0] = "NOP", 1968 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", 1969 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", 1970 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" 1971 }; 1972 1973 if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { 1974 /* 1975 * If we are told to disable, note that so link-recovery 1976 * code does not attempt to bring us back up. 1977 */ 1978 preempt_disable(); 1979 dd->ipath_flags |= IPATH_IB_LINK_DISABLED; 1980 preempt_enable(); 1981 } else if (linitcmd) { 1982 /* 1983 * Any other linkinitcmd will lead to LINKDOWN and then 1984 * to INIT (if all is well), so clear flag to let 1985 * link-recovery code attempt to bring us back up. 1986 */ 1987 preempt_disable(); 1988 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; 1989 preempt_enable(); 1990 } 1991 1992 mod_wd = (linkcmd << dd->ibcc_lc_shift) | 1993 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); 1994 ipath_cdbg(VERBOSE, 1995 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", 1996 dd->ipath_unit, what[linkcmd], linitcmd, 1997 ipath_ibcstatus_str[ipath_ib_linktrstate(dd, 1998 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); 1999 2000 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2001 dd->ipath_ibcctrl | mod_wd); 2002 /* read from chip so write is flushed */ 2003 (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2004} 2005 2006int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) 2007{ 2008 u32 lstate; 2009 int ret; 2010 2011 switch (newstate) { 2012 case IPATH_IB_LINKDOWN_ONLY: 2013 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); 2014 /* don't wait */ 2015 ret = 0; 2016 goto bail; 2017 2018 case IPATH_IB_LINKDOWN: 2019 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2020 INFINIPATH_IBCC_LINKINITCMD_POLL); 2021 /* don't wait */ 2022 ret = 0; 2023 goto bail; 2024 2025 case IPATH_IB_LINKDOWN_SLEEP: 2026 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2027 INFINIPATH_IBCC_LINKINITCMD_SLEEP); 2028 /* don't wait */ 2029 ret = 0; 2030 goto bail; 2031 2032 case IPATH_IB_LINKDOWN_DISABLE: 2033 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2034 INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2035 /* don't wait */ 2036 ret = 0; 2037 goto bail; 2038 2039 case IPATH_IB_LINKARM: 2040 if (dd->ipath_flags & IPATH_LINKARMED) { 2041 ret = 0; 2042 goto bail; 2043 } 2044 if (!(dd->ipath_flags & 2045 (IPATH_LINKINIT | IPATH_LINKACTIVE))) { 2046 ret = -EINVAL; 2047 goto bail; 2048 } 2049 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); 2050 2051 /* 2052 * Since the port can transition to ACTIVE by receiving 2053 * a non VL 15 packet, wait for either state. 2054 */ 2055 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; 2056 break; 2057 2058 case IPATH_IB_LINKACTIVE: 2059 if (dd->ipath_flags & IPATH_LINKACTIVE) { 2060 ret = 0; 2061 goto bail; 2062 } 2063 if (!(dd->ipath_flags & IPATH_LINKARMED)) { 2064 ret = -EINVAL; 2065 goto bail; 2066 } 2067 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); 2068 lstate = IPATH_LINKACTIVE; 2069 break; 2070 2071 case IPATH_IB_LINK_LOOPBACK: 2072 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); 2073 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; 2074 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2075 dd->ipath_ibcctrl); 2076 2077 /* turn heartbeat off, as it causes loopback to fail */ 2078 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2079 IPATH_IB_HRTBT_OFF); 2080 /* don't wait */ 2081 ret = 0; 2082 goto bail; 2083 2084 case IPATH_IB_LINK_EXTERNAL: 2085 dev_info(&dd->pcidev->dev, 2086 "Disabling IB local loopback (normal)\n"); 2087 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2088 IPATH_IB_HRTBT_ON); 2089 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; 2090 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2091 dd->ipath_ibcctrl); 2092 /* don't wait */ 2093 ret = 0; 2094 goto bail; 2095 2096 /* 2097 * Heartbeat can be explicitly enabled by the user via 2098 * "hrtbt_enable" "file", and if disabled, trying to enable here 2099 * will have no effect. Implicit changes (heartbeat off when 2100 * loopback on, and vice versa) are included to ease testing. 2101 */ 2102 case IPATH_IB_LINK_HRTBT: 2103 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2104 IPATH_IB_HRTBT_ON); 2105 goto bail; 2106 2107 case IPATH_IB_LINK_NO_HRTBT: 2108 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2109 IPATH_IB_HRTBT_OFF); 2110 goto bail; 2111 2112 default: 2113 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 2114 ret = -EINVAL; 2115 goto bail; 2116 } 2117 ret = ipath_wait_linkstate(dd, lstate, 2000); 2118 2119bail: 2120 return ret; 2121} 2122 2123/** 2124 * ipath_set_mtu - set the MTU 2125 * @dd: the infinipath device 2126 * @arg: the new MTU 2127 * 2128 * we can handle "any" incoming size, the issue here is whether we 2129 * need to restrict our outgoing size. For now, we don't do any 2130 * sanity checking on this, and we don't deal with what happens to 2131 * programs that are already running when the size changes. 2132 * NOTE: changing the MTU will usually cause the IBC to go back to 2133 * link INIT state... 2134 */ 2135int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) 2136{ 2137 u32 piosize; 2138 int changed = 0; 2139 int ret; 2140 2141 /* 2142 * mtu is IB data payload max. It's the largest power of 2 less 2143 * than piosize (or even larger, since it only really controls the 2144 * largest we can receive; we can send the max of the mtu and 2145 * piosize). We check that it's one of the valid IB sizes. 2146 */ 2147 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && 2148 (arg != 4096 || !ipath_mtu4096)) { 2149 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); 2150 ret = -EINVAL; 2151 goto bail; 2152 } 2153 if (dd->ipath_ibmtu == arg) { 2154 ret = 0; /* same as current */ 2155 goto bail; 2156 } 2157 2158 piosize = dd->ipath_ibmaxlen; 2159 dd->ipath_ibmtu = arg; 2160 2161 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { 2162 /* Only if it's not the initial value (or reset to it) */ 2163 if (piosize != dd->ipath_init_ibmaxlen) { 2164 if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) 2165 piosize = dd->ipath_init_ibmaxlen; 2166 dd->ipath_ibmaxlen = piosize; 2167 changed = 1; 2168 } 2169 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { 2170 piosize = arg + IPATH_PIO_MAXIBHDR; 2171 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " 2172 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, 2173 arg); 2174 dd->ipath_ibmaxlen = piosize; 2175 changed = 1; 2176 } 2177 2178 if (changed) { 2179 u64 ibc = dd->ipath_ibcctrl, ibdw; 2180 /* 2181 * update our housekeeping variables, and set IBC max 2182 * size, same as init code; max IBC is max we allow in 2183 * buffer, less the qword pbc, plus 1 for ICRC, in dwords 2184 */ 2185 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); 2186 ibdw = (dd->ipath_ibmaxlen >> 2) + 1; 2187 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << 2188 dd->ibcc_mpl_shift); 2189 ibc |= ibdw << dd->ibcc_mpl_shift; 2190 dd->ipath_ibcctrl = ibc; 2191 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2192 dd->ipath_ibcctrl); 2193 dd->ipath_f_tidtemplate(dd); 2194 } 2195 2196 ret = 0; 2197 2198bail: 2199 return ret; 2200} 2201 2202int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) 2203{ 2204 dd->ipath_lid = lid; 2205 dd->ipath_lmc = lmc; 2206 2207 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | 2208 (~((1U << lmc) - 1)) << 16); 2209 2210 dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); 2211 2212 return 0; 2213} 2214 2215 2216/** 2217 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 2218 * @dd: the infinipath device 2219 * @regno: the register number to write 2220 * @port: the port containing the register 2221 * @value: the value to write 2222 * 2223 * Registers that vary with the chip implementation constants (port) 2224 * use this routine. 2225 */ 2226void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, 2227 unsigned port, u64 value) 2228{ 2229 u16 where; 2230 2231 if (port < dd->ipath_portcnt && 2232 (regno == dd->ipath_kregs->kr_rcvhdraddr || 2233 regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) 2234 where = regno + port; 2235 else 2236 where = -1; 2237 2238 ipath_write_kreg(dd, where, value); 2239} 2240 2241/* 2242 * Following deal with the "obviously simple" task of overriding the state 2243 * of the LEDS, which normally indicate link physical and logical status. 2244 * The complications arise in dealing with different hardware mappings 2245 * and the board-dependent routine being called from interrupts. 2246 * and then there's the requirement to _flash_ them. 2247 */ 2248#define LED_OVER_FREQ_SHIFT 8 2249#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) 2250/* Below is "non-zero" to force override, but both actual LEDs are off */ 2251#define LED_OVER_BOTH_OFF (8) 2252 2253static void ipath_run_led_override(unsigned long opaque) 2254{ 2255 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2256 int timeoff; 2257 int pidx; 2258 u64 lstate, ltstate, val; 2259 2260 if (!(dd->ipath_flags & IPATH_INITTED)) 2261 return; 2262 2263 pidx = dd->ipath_led_override_phase++ & 1; 2264 dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; 2265 timeoff = dd->ipath_led_override_timeoff; 2266 2267 /* 2268 * below potentially restores the LED values per current status, 2269 * should also possibly setup the traffic-blink register, 2270 * but leave that to per-chip functions. 2271 */ 2272 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2273 ltstate = ipath_ib_linktrstate(dd, val); 2274 lstate = ipath_ib_linkstate(dd, val); 2275 2276 dd->ipath_f_setextled(dd, lstate, ltstate); 2277 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); 2278} 2279 2280void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) 2281{ 2282 int timeoff, freq; 2283 2284 if (!(dd->ipath_flags & IPATH_INITTED)) 2285 return; 2286 2287 /* First check if we are blinking. If not, use 1HZ polling */ 2288 timeoff = HZ; 2289 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; 2290 2291 if (freq) { 2292 /* For blink, set each phase from one nybble of val */ 2293 dd->ipath_led_override_vals[0] = val & 0xF; 2294 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; 2295 timeoff = (HZ << 4)/freq; 2296 } else { 2297 /* Non-blink set both phases the same. */ 2298 dd->ipath_led_override_vals[0] = val & 0xF; 2299 dd->ipath_led_override_vals[1] = val & 0xF; 2300 } 2301 dd->ipath_led_override_timeoff = timeoff; 2302 2303 /* 2304 * If the timer has not already been started, do so. Use a "quick" 2305 * timeout so the function will be called soon, to look at our request. 2306 */ 2307 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { 2308 /* Need to start timer */ 2309 init_timer(&dd->ipath_led_override_timer); 2310 dd->ipath_led_override_timer.function = 2311 ipath_run_led_override; 2312 dd->ipath_led_override_timer.data = (unsigned long) dd; 2313 dd->ipath_led_override_timer.expires = jiffies + 1; 2314 add_timer(&dd->ipath_led_override_timer); 2315 } else 2316 atomic_dec(&dd->ipath_led_override_timer_active); 2317} 2318 2319/** 2320 * ipath_shutdown_device - shut down a device 2321 * @dd: the infinipath device 2322 * 2323 * This is called to make the device quiet when we are about to 2324 * unload the driver, and also when the device is administratively 2325 * disabled. It does not free any data structures. 2326 * Everything it does has to be setup again by ipath_init_chip(dd,1) 2327 */ 2328void ipath_shutdown_device(struct ipath_devdata *dd) 2329{ 2330 unsigned long flags; 2331 2332 ipath_dbg("Shutting down the device\n"); 2333 2334 ipath_hol_up(dd); /* make sure user processes aren't suspended */ 2335 2336 dd->ipath_flags |= IPATH_LINKUNK; 2337 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | 2338 IPATH_LINKINIT | IPATH_LINKARMED | 2339 IPATH_LINKACTIVE); 2340 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | 2341 IPATH_STATUS_IB_READY); 2342 2343 /* mask interrupts, but not errors */ 2344 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2345 2346 dd->ipath_rcvctrl = 0; 2347 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 2348 dd->ipath_rcvctrl); 2349 2350 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2351 teardown_sdma(dd); 2352 2353 /* 2354 * gracefully stop all sends allowing any in progress to trickle out 2355 * first. 2356 */ 2357 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 2358 dd->ipath_sendctrl = 0; 2359 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); 2360 /* flush it */ 2361 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2362 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 2363 2364 /* 2365 * enough for anything that's going to trickle out to have actually 2366 * done so. 2367 */ 2368 udelay(5); 2369 2370 dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ 2371 2372 ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2373 ipath_cancel_sends(dd, 0); 2374 2375 /* 2376 * we are shutting down, so tell components that care. We don't do 2377 * this on just a link state change, much like ethernet, a cable 2378 * unplug, etc. doesn't change driver state 2379 */ 2380 signal_ib_event(dd, IB_EVENT_PORT_ERR); 2381 2382 /* disable IBC */ 2383 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2384 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 2385 dd->ipath_control | INFINIPATH_C_FREEZEMODE); 2386 2387 /* 2388 * clear SerdesEnable and turn the leds off; do this here because 2389 * we are unloading, so don't count on interrupts to move along 2390 * Turn the LEDs off explictly for the same reason. 2391 */ 2392 dd->ipath_f_quiet_serdes(dd); 2393 2394 /* stop all the timers that might still be running */ 2395 del_timer_sync(&dd->ipath_hol_timer); 2396 if (dd->ipath_stats_timer_active) { 2397 del_timer_sync(&dd->ipath_stats_timer); 2398 dd->ipath_stats_timer_active = 0; 2399 } 2400 if (dd->ipath_intrchk_timer.data) { 2401 del_timer_sync(&dd->ipath_intrchk_timer); 2402 dd->ipath_intrchk_timer.data = 0; 2403 } 2404 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2405 del_timer_sync(&dd->ipath_led_override_timer); 2406 atomic_set(&dd->ipath_led_override_timer_active, 0); 2407 } 2408 2409 /* 2410 * clear all interrupts and errors, so that the next time the driver 2411 * is loaded or device is enabled, we know that whatever is set 2412 * happened while we were unloaded 2413 */ 2414 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 2415 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2416 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2417 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2418 2419 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); 2420 ipath_update_eeprom_log(dd); 2421} 2422 2423/** 2424 * ipath_free_pddata - free a port's allocated data 2425 * @dd: the infinipath device 2426 * @pd: the portdata structure 2427 * 2428 * free up any allocated data for a port 2429 * This should not touch anything that would affect a simultaneous 2430 * re-allocation of port data, because it is called after ipath_mutex 2431 * is released (and can be called from reinit as well). 2432 * It should never change any chip state, or global driver state. 2433 * (The only exception to global state is freeing the port0 port0_skbs.) 2434 */ 2435void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) 2436{ 2437 if (!pd) 2438 return; 2439 2440 if (pd->port_rcvhdrq) { 2441 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " 2442 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, 2443 (unsigned long) pd->port_rcvhdrq_size); 2444 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, 2445 pd->port_rcvhdrq, pd->port_rcvhdrq_phys); 2446 pd->port_rcvhdrq = NULL; 2447 if (pd->port_rcvhdrtail_kvaddr) { 2448 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 2449 pd->port_rcvhdrtail_kvaddr, 2450 pd->port_rcvhdrqtailaddr_phys); 2451 pd->port_rcvhdrtail_kvaddr = NULL; 2452 } 2453 } 2454 if (pd->port_port && pd->port_rcvegrbuf) { 2455 unsigned e; 2456 2457 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 2458 void *base = pd->port_rcvegrbuf[e]; 2459 size_t size = pd->port_rcvegrbuf_size; 2460 2461 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " 2462 "chunk %u/%u\n", base, 2463 (unsigned long) size, 2464 e, pd->port_rcvegrbuf_chunks); 2465 dma_free_coherent(&dd->pcidev->dev, size, 2466 base, pd->port_rcvegrbuf_phys[e]); 2467 } 2468 kfree(pd->port_rcvegrbuf); 2469 pd->port_rcvegrbuf = NULL; 2470 kfree(pd->port_rcvegrbuf_phys); 2471 pd->port_rcvegrbuf_phys = NULL; 2472 pd->port_rcvegrbuf_chunks = 0; 2473 } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { 2474 unsigned e; 2475 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; 2476 2477 dd->ipath_port0_skbinfo = NULL; 2478 ipath_cdbg(VERBOSE, "free closed port %d " 2479 "ipath_port0_skbinfo @ %p\n", pd->port_port, 2480 skbinfo); 2481 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) 2482 if (skbinfo[e].skb) { 2483 pci_unmap_single(dd->pcidev, skbinfo[e].phys, 2484 dd->ipath_ibmaxlen, 2485 PCI_DMA_FROMDEVICE); 2486 dev_kfree_skb(skbinfo[e].skb); 2487 } 2488 vfree(skbinfo); 2489 } 2490 kfree(pd->port_tid_pg_list); 2491 vfree(pd->subport_uregbase); 2492 vfree(pd->subport_rcvegrbuf); 2493 vfree(pd->subport_rcvhdr_base); 2494 kfree(pd); 2495} 2496 2497static int __init infinipath_init(void) 2498{ 2499 int ret; 2500 2501 if (ipath_debug & __IPATH_DBG) 2502 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2503 2504 /* 2505 * These must be called before the driver is registered with 2506 * the PCI subsystem. 2507 */ 2508 idr_init(&unit_table); 2509 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 2510 printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); 2511 ret = -ENOMEM; 2512 goto bail; 2513 } 2514 2515 ret = pci_register_driver(&ipath_driver); 2516 if (ret < 0) { 2517 printk(KERN_ERR IPATH_DRV_NAME 2518 ": Unable to register driver: error %d\n", -ret); 2519 goto bail_unit; 2520 } 2521 2522 ret = ipath_init_ipathfs(); 2523 if (ret < 0) { 2524 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " 2525 "ipathfs: error %d\n", -ret); 2526 goto bail_pci; 2527 } 2528 2529 goto bail; 2530 2531bail_pci: 2532 pci_unregister_driver(&ipath_driver); 2533 2534bail_unit: 2535 idr_destroy(&unit_table); 2536 2537bail: 2538 return ret; 2539} 2540 2541static void __exit infinipath_cleanup(void) 2542{ 2543 ipath_exit_ipathfs(); 2544 2545 ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); 2546 pci_unregister_driver(&ipath_driver); 2547 2548 idr_destroy(&unit_table); 2549} 2550 2551/** 2552 * ipath_reset_device - reset the chip if possible 2553 * @unit: the device to reset 2554 * 2555 * Whether or not reset is successful, we attempt to re-initialize the chip 2556 * (that is, much like a driver unload/reload). We clear the INITTED flag 2557 * so that the various entry points will fail until we reinitialize. For 2558 * now, we only allow this if no user ports are open that use chip resources 2559 */ 2560int ipath_reset_device(int unit) 2561{ 2562 int ret, i; 2563 struct ipath_devdata *dd = ipath_lookup(unit); 2564 unsigned long flags; 2565 2566 if (!dd) { 2567 ret = -ENODEV; 2568 goto bail; 2569 } 2570 2571 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2572 /* Need to stop LED timer, _then_ shut off LEDs */ 2573 del_timer_sync(&dd->ipath_led_override_timer); 2574 atomic_set(&dd->ipath_led_override_timer_active, 0); 2575 } 2576 2577 /* Shut off LEDs after we are sure timer is not running */ 2578 dd->ipath_led_override = LED_OVER_BOTH_OFF; 2579 dd->ipath_f_setextled(dd, 0, 0); 2580 2581 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); 2582 2583 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { 2584 dev_info(&dd->pcidev->dev, "Invalid unit number %u or " 2585 "not initialized or not present\n", unit); 2586 ret = -ENXIO; 2587 goto bail; 2588 } 2589 2590 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2591 if (dd->ipath_pd) 2592 for (i = 1; i < dd->ipath_cfgports; i++) { 2593 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2594 continue; 2595 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2596 ipath_dbg("unit %u port %d is in use " 2597 "(PID %u cmd %s), can't reset\n", 2598 unit, i, 2599 pid_nr(dd->ipath_pd[i]->port_pid), 2600 dd->ipath_pd[i]->port_comm); 2601 ret = -EBUSY; 2602 goto bail; 2603 } 2604 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2605 2606 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2607 teardown_sdma(dd); 2608 2609 dd->ipath_flags &= ~IPATH_INITTED; 2610 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2611 ret = dd->ipath_f_reset(dd); 2612 if (ret == 1) { 2613 ipath_dbg("Reinitializing unit %u after reset attempt\n", 2614 unit); 2615 ret = ipath_init_chip(dd, 1); 2616 } else 2617 ret = -EAGAIN; 2618 if (ret) 2619 ipath_dev_err(dd, "Reinitialize unit %u after " 2620 "reset failed with %d\n", unit, ret); 2621 else 2622 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " 2623 "resetting\n", unit); 2624 2625bail: 2626 return ret; 2627} 2628 2629/* 2630 * send a signal to all the processes that have the driver open 2631 * through the normal interfaces (i.e., everything other than diags 2632 * interface). Returns number of signalled processes. 2633 */ 2634static int ipath_signal_procs(struct ipath_devdata *dd, int sig) 2635{ 2636 int i, sub, any = 0; 2637 struct pid *pid; 2638 unsigned long flags; 2639 2640 if (!dd->ipath_pd) 2641 return 0; 2642 2643 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2644 for (i = 1; i < dd->ipath_cfgports; i++) { 2645 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2646 continue; 2647 pid = dd->ipath_pd[i]->port_pid; 2648 if (!pid) 2649 continue; 2650 2651 dev_info(&dd->pcidev->dev, "context %d in use " 2652 "(PID %u), sending signal %d\n", 2653 i, pid_nr(pid), sig); 2654 kill_pid(pid, sig, 1); 2655 any++; 2656 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { 2657 pid = dd->ipath_pd[i]->port_subpid[sub]; 2658 if (!pid) 2659 continue; 2660 dev_info(&dd->pcidev->dev, "sub-context " 2661 "%d:%d in use (PID %u), sending " 2662 "signal %d\n", i, sub, pid_nr(pid), sig); 2663 kill_pid(pid, sig, 1); 2664 any++; 2665 } 2666 } 2667 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2668 return any; 2669} 2670 2671static void ipath_hol_signal_down(struct ipath_devdata *dd) 2672{ 2673 if (ipath_signal_procs(dd, SIGSTOP)) 2674 ipath_dbg("Stopped some processes\n"); 2675 ipath_cancel_sends(dd, 1); 2676} 2677 2678 2679static void ipath_hol_signal_up(struct ipath_devdata *dd) 2680{ 2681 if (ipath_signal_procs(dd, SIGCONT)) 2682 ipath_dbg("Continued some processes\n"); 2683} 2684 2685/* 2686 * link is down, stop any users processes, and flush pending sends 2687 * to prevent HoL blocking, then start the HoL timer that 2688 * periodically continues, then stop procs, so they can detect 2689 * link down if they want, and do something about it. 2690 * Timer may already be running, so use mod_timer, not add_timer. 2691 */ 2692void ipath_hol_down(struct ipath_devdata *dd) 2693{ 2694 dd->ipath_hol_state = IPATH_HOL_DOWN; 2695 ipath_hol_signal_down(dd); 2696 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2697 dd->ipath_hol_timer.expires = jiffies + 2698 msecs_to_jiffies(ipath_hol_timeout_ms); 2699 mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); 2700} 2701 2702/* 2703 * link is up, continue any user processes, and ensure timer 2704 * is a nop, if running. Let timer keep running, if set; it 2705 * will nop when it sees the link is up 2706 */ 2707void ipath_hol_up(struct ipath_devdata *dd) 2708{ 2709 ipath_hol_signal_up(dd); 2710 dd->ipath_hol_state = IPATH_HOL_UP; 2711} 2712 2713/* 2714 * toggle the running/not running state of user proceses 2715 * to prevent HoL blocking on chip resources, but still allow 2716 * user processes to do link down special case handling. 2717 * Should only be called via the timer 2718 */ 2719void ipath_hol_event(unsigned long opaque) 2720{ 2721 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2722 2723 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP 2724 && dd->ipath_hol_state != IPATH_HOL_UP) { 2725 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2726 ipath_dbg("Stopping processes\n"); 2727 ipath_hol_signal_down(dd); 2728 } else { /* may do "extra" if also in ipath_hol_up() */ 2729 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; 2730 ipath_dbg("Continuing processes\n"); 2731 ipath_hol_signal_up(dd); 2732 } 2733 if (dd->ipath_hol_state == IPATH_HOL_UP) 2734 ipath_dbg("link's up, don't resched timer\n"); 2735 else { 2736 dd->ipath_hol_timer.expires = jiffies + 2737 msecs_to_jiffies(ipath_hol_timeout_ms); 2738 mod_timer(&dd->ipath_hol_timer, 2739 dd->ipath_hol_timer.expires); 2740 } 2741} 2742 2743int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) 2744{ 2745 u64 val; 2746 2747 if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) 2748 return -1; 2749 if (dd->ipath_rx_pol_inv != new_pol_inv) { 2750 dd->ipath_rx_pol_inv = new_pol_inv; 2751 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 2752 val &= ~(INFINIPATH_XGXS_RX_POL_MASK << 2753 INFINIPATH_XGXS_RX_POL_SHIFT); 2754 val |= ((u64)dd->ipath_rx_pol_inv) << 2755 INFINIPATH_XGXS_RX_POL_SHIFT; 2756 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 2757 } 2758 return 0; 2759} 2760 2761/* 2762 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on 2763 * the 7220, which is count-based, rather than trigger-based. Safe for the 2764 * driver check, since it's at init. Not completely safe when used for 2765 * user-mode checking, since some error checking can be lost, but not 2766 * particularly risky, and only has problematic side-effects in the face of 2767 * very buggy user code. There is no reference counting, but that's also 2768 * fine, given the intended use. 2769 */ 2770void ipath_enable_armlaunch(struct ipath_devdata *dd) 2771{ 2772 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; 2773 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 2774 INFINIPATH_E_SPIOARMLAUNCH); 2775 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; 2776 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2777 dd->ipath_errormask); 2778} 2779 2780void ipath_disable_armlaunch(struct ipath_devdata *dd) 2781{ 2782 /* so don't re-enable if already set */ 2783 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; 2784 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; 2785 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2786 dd->ipath_errormask); 2787} 2788 2789module_init(infinipath_init); 2790module_exit(infinipath_cleanup);