Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

GenWQE Utility functions

Miscelanous functionality used in the other GenWQE driver parts.

Signed-off-by: Frank Haverkamp <haver@linux.vnet.ibm.com>
Co-authors: Joerg-Stephan Vogt <jsvogt@de.ibm.com>,
Michael Jung <MIJUNG@de.ibm.com>,
Michael Ruettger <michael@ibmra.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Frank Haverkamp and committed by
Greg Kroah-Hartman
67f4addb eaf4722d

+947
+947
drivers/misc/genwqe/card_utils.c
··· 1 + /** 2 + * IBM Accelerator Family 'GenWQE' 3 + * 4 + * (C) Copyright IBM Corp. 2013 5 + * 6 + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> 7 + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> 8 + * Author: Michael Jung <mijung@de.ibm.com> 9 + * Author: Michael Ruettger <michael@ibmra.de> 10 + * 11 + * This program is free software; you can redistribute it and/or modify 12 + * it under the terms of the GNU General Public License (version 2 only) 13 + * as published by the Free Software Foundation. 14 + * 15 + * This program is distributed in the hope that it will be useful, 16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 + * GNU General Public License for more details. 19 + */ 20 + 21 + /* 22 + * Miscelanous functionality used in the other GenWQE driver parts. 23 + */ 24 + 25 + #include <linux/kernel.h> 26 + #include <linux/dma-mapping.h> 27 + #include <linux/sched.h> 28 + #include <linux/vmalloc.h> 29 + #include <linux/page-flags.h> 30 + #include <linux/scatterlist.h> 31 + #include <linux/hugetlb.h> 32 + #include <linux/iommu.h> 33 + #include <linux/delay.h> 34 + #include <linux/pci.h> 35 + #include <linux/dma-mapping.h> 36 + #include <linux/ctype.h> 37 + #include <linux/module.h> 38 + #include <linux/platform_device.h> 39 + #include <linux/delay.h> 40 + #include <asm/pgtable.h> 41 + 42 + #include "genwqe_driver.h" 43 + #include "card_base.h" 44 + #include "card_ddcb.h" 45 + 46 + /** 47 + * __genwqe_writeq() - Write 64-bit register 48 + * @cd: genwqe device descriptor 49 + * @byte_offs: byte offset within BAR 50 + * @val: 64-bit value 51 + * 52 + * Return: 0 if success; < 0 if error 53 + */ 54 + int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) 55 + { 56 + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 57 + return -EIO; 58 + 59 + if (cd->mmio == NULL) 60 + return -EIO; 61 + 62 + __raw_writeq(cpu_to_be64((val)), (cd->mmio + byte_offs)); 63 + return 0; 64 + } 65 + 66 + /** 67 + * __genwqe_readq() - Read 64-bit register 68 + * @cd: genwqe device descriptor 69 + * @byte_offs: offset within BAR 70 + * 71 + * Return: value from register 72 + */ 73 + u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) 74 + { 75 + u64 val; 76 + 77 + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 78 + return 0xffffffffffffffffull; 79 + 80 + if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && 81 + (byte_offs == IO_SLC_CFGREG_GFIR)) 82 + return 0x000000000000ffffull; 83 + 84 + if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && 85 + (byte_offs == IO_SLC_CFGREG_GFIR)) 86 + return 0x00000000ffff0000ull; 87 + 88 + if (cd->mmio == NULL) 89 + return 0xffffffffffffffffull; 90 + 91 + val = be64_to_cpu(__raw_readq(cd->mmio + byte_offs)); 92 + return val; 93 + } 94 + 95 + /** 96 + * __genwqe_writel() - Write 32-bit register 97 + * @cd: genwqe device descriptor 98 + * @byte_offs: byte offset within BAR 99 + * @val: 32-bit value 100 + * 101 + * Return: 0 if success; < 0 if error 102 + */ 103 + int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) 104 + { 105 + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 106 + return -EIO; 107 + 108 + if (cd->mmio == NULL) 109 + return -EIO; 110 + 111 + __raw_writel(cpu_to_be32((val)), cd->mmio + byte_offs); 112 + return 0; 113 + } 114 + 115 + /** 116 + * __genwqe_readl() - Read 32-bit register 117 + * @cd: genwqe device descriptor 118 + * @byte_offs: offset within BAR 119 + * 120 + * Return: Value from register 121 + */ 122 + u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) 123 + { 124 + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 125 + return 0xffffffff; 126 + 127 + if (cd->mmio == NULL) 128 + return 0xffffffff; 129 + 130 + return be32_to_cpu(__raw_readl(cd->mmio + byte_offs)); 131 + } 132 + 133 + /** 134 + * genwqe_read_app_id() - Extract app_id 135 + * 136 + * app_unitcfg need to be filled with valid data first 137 + */ 138 + int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) 139 + { 140 + int i, j; 141 + u32 app_id = (u32)cd->app_unitcfg; 142 + 143 + memset(app_name, 0, len); 144 + for (i = 0, j = 0; j < min(len, 4); j++) { 145 + char ch = (char)((app_id >> (24 - j*8)) & 0xff); 146 + if (ch == ' ') 147 + continue; 148 + app_name[i++] = isprint(ch) ? ch : 'X'; 149 + } 150 + return i; 151 + } 152 + 153 + /** 154 + * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations 155 + * 156 + * Existing kernel functions seem to use a different polynom, 157 + * therefore we could not use them here. 158 + * 159 + * Genwqe's Polynomial = 0x20044009 160 + */ 161 + #define CRC32_POLYNOMIAL 0x20044009 162 + static u32 crc32_tab[256]; /* crc32 lookup table */ 163 + 164 + void genwqe_init_crc32(void) 165 + { 166 + int i, j; 167 + u32 crc; 168 + 169 + for (i = 0; i < 256; i++) { 170 + crc = i << 24; 171 + for (j = 0; j < 8; j++) { 172 + if (crc & 0x80000000) 173 + crc = (crc << 1) ^ CRC32_POLYNOMIAL; 174 + else 175 + crc = (crc << 1); 176 + } 177 + crc32_tab[i] = crc; 178 + } 179 + } 180 + 181 + /** 182 + * genwqe_crc32() - Generate 32-bit crc as required for DDCBs 183 + * @buff: pointer to data buffer 184 + * @len: length of data for calculation 185 + * @init: initial crc (0xffffffff at start) 186 + * 187 + * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) 188 + 189 + * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should 190 + * result in a crc32 of 0xf33cb7d3. 191 + * 192 + * The existing kernel crc functions did not cover this polynom yet. 193 + * 194 + * Return: crc32 checksum. 195 + */ 196 + u32 genwqe_crc32(u8 *buff, size_t len, u32 init) 197 + { 198 + int i; 199 + u32 crc; 200 + 201 + crc = init; 202 + while (len--) { 203 + i = ((crc >> 24) ^ *buff++) & 0xFF; 204 + crc = (crc << 8) ^ crc32_tab[i]; 205 + } 206 + return crc; 207 + } 208 + 209 + void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, 210 + dma_addr_t *dma_handle) 211 + { 212 + if (get_order(size) > MAX_ORDER) 213 + return NULL; 214 + 215 + return pci_alloc_consistent(cd->pci_dev, size, dma_handle); 216 + } 217 + 218 + void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, 219 + void *vaddr, dma_addr_t dma_handle) 220 + { 221 + if (vaddr == NULL) 222 + return; 223 + 224 + pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); 225 + } 226 + 227 + static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, 228 + int num_pages) 229 + { 230 + int i; 231 + struct pci_dev *pci_dev = cd->pci_dev; 232 + 233 + for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { 234 + pci_unmap_page(pci_dev, dma_list[i], 235 + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 236 + dma_list[i] = 0x0; 237 + } 238 + } 239 + 240 + static int genwqe_map_pages(struct genwqe_dev *cd, 241 + struct page **page_list, int num_pages, 242 + dma_addr_t *dma_list) 243 + { 244 + int i; 245 + struct pci_dev *pci_dev = cd->pci_dev; 246 + 247 + /* establish DMA mapping for requested pages */ 248 + for (i = 0; i < num_pages; i++) { 249 + dma_addr_t daddr; 250 + 251 + dma_list[i] = 0x0; 252 + daddr = pci_map_page(pci_dev, page_list[i], 253 + 0, /* map_offs */ 254 + PAGE_SIZE, 255 + PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ 256 + 257 + if (pci_dma_mapping_error(pci_dev, daddr)) { 258 + dev_err(&pci_dev->dev, 259 + "[%s] err: no dma addr daddr=%016llx!\n", 260 + __func__, (long long)daddr); 261 + goto err; 262 + } 263 + 264 + dma_list[i] = daddr; 265 + } 266 + return 0; 267 + 268 + err: 269 + genwqe_unmap_pages(cd, dma_list, num_pages); 270 + return -EIO; 271 + } 272 + 273 + static int genwqe_sgl_size(int num_pages) 274 + { 275 + int len, num_tlb = num_pages / 7; 276 + 277 + len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); 278 + return roundup(len, PAGE_SIZE); 279 + } 280 + 281 + struct sg_entry *genwqe_alloc_sgl(struct genwqe_dev *cd, int num_pages, 282 + dma_addr_t *dma_addr, size_t *sgl_size) 283 + { 284 + struct pci_dev *pci_dev = cd->pci_dev; 285 + struct sg_entry *sgl; 286 + 287 + *sgl_size = genwqe_sgl_size(num_pages); 288 + if (get_order(*sgl_size) > MAX_ORDER) { 289 + dev_err(&pci_dev->dev, 290 + "[%s] err: too much memory requested!\n", __func__); 291 + return NULL; 292 + } 293 + 294 + sgl = __genwqe_alloc_consistent(cd, *sgl_size, dma_addr); 295 + if (sgl == NULL) { 296 + dev_err(&pci_dev->dev, 297 + "[%s] err: no memory available!\n", __func__); 298 + return NULL; 299 + } 300 + 301 + return sgl; 302 + } 303 + 304 + int genwqe_setup_sgl(struct genwqe_dev *cd, 305 + unsigned long offs, 306 + unsigned long size, 307 + struct sg_entry *sgl, 308 + dma_addr_t dma_addr, size_t sgl_size, 309 + dma_addr_t *dma_list, int page_offs, int num_pages) 310 + { 311 + int i = 0, j = 0, p; 312 + unsigned long dma_offs, map_offs; 313 + struct pci_dev *pci_dev = cd->pci_dev; 314 + dma_addr_t prev_daddr = 0; 315 + struct sg_entry *s, *last_s = NULL; 316 + 317 + /* sanity checks */ 318 + if (offs > PAGE_SIZE) { 319 + dev_err(&pci_dev->dev, 320 + "[%s] too large start offs %08lx\n", __func__, offs); 321 + return -EFAULT; 322 + } 323 + if (sgl_size < genwqe_sgl_size(num_pages)) { 324 + dev_err(&pci_dev->dev, 325 + "[%s] sgl_size too small %08lx for %d pages\n", 326 + __func__, sgl_size, num_pages); 327 + return -EFAULT; 328 + } 329 + 330 + dma_offs = 128; /* next block if needed/dma_offset */ 331 + map_offs = offs; /* offset in first page */ 332 + 333 + s = &sgl[0]; /* first set of 8 entries */ 334 + p = 0; /* page */ 335 + while (p < num_pages) { 336 + dma_addr_t daddr; 337 + unsigned int size_to_map; 338 + 339 + /* always write the chaining entry, cleanup is done later */ 340 + j = 0; 341 + s[j].target_addr = cpu_to_be64(dma_addr + dma_offs); 342 + s[j].len = cpu_to_be32(128); 343 + s[j].flags = cpu_to_be32(SG_CHAINED); 344 + j++; 345 + 346 + while (j < 8) { 347 + /* DMA mapping for requested page, offs, size */ 348 + size_to_map = min(size, PAGE_SIZE - map_offs); 349 + daddr = dma_list[page_offs + p] + map_offs; 350 + size -= size_to_map; 351 + map_offs = 0; 352 + 353 + if (prev_daddr == daddr) { 354 + u32 prev_len = be32_to_cpu(last_s->len); 355 + 356 + /* pr_info("daddr combining: " 357 + "%016llx/%08x -> %016llx\n", 358 + prev_daddr, prev_len, daddr); */ 359 + 360 + last_s->len = cpu_to_be32(prev_len + 361 + size_to_map); 362 + 363 + p++; /* process next page */ 364 + if (p == num_pages) 365 + goto fixup; /* nothing to do */ 366 + 367 + prev_daddr = daddr + size_to_map; 368 + continue; 369 + } 370 + 371 + /* start new entry */ 372 + s[j].target_addr = cpu_to_be64(daddr); 373 + s[j].len = cpu_to_be32(size_to_map); 374 + s[j].flags = cpu_to_be32(SG_DATA); 375 + prev_daddr = daddr + size_to_map; 376 + last_s = &s[j]; 377 + j++; 378 + 379 + p++; /* process next page */ 380 + if (p == num_pages) 381 + goto fixup; /* nothing to do */ 382 + } 383 + dma_offs += 128; 384 + s += 8; /* continue 8 elements further */ 385 + } 386 + fixup: 387 + if (j == 1) { /* combining happend on last entry! */ 388 + s -= 8; /* full shift needed on previous sgl block */ 389 + j = 7; /* shift all elements */ 390 + } 391 + 392 + for (i = 0; i < j; i++) /* move elements 1 up */ 393 + s[i] = s[i + 1]; 394 + 395 + s[i].target_addr = cpu_to_be64(0); 396 + s[i].len = cpu_to_be32(0); 397 + s[i].flags = cpu_to_be32(SG_END_LIST); 398 + return 0; 399 + } 400 + 401 + void genwqe_free_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list, 402 + dma_addr_t dma_addr, size_t size) 403 + { 404 + __genwqe_free_consistent(cd, size, sg_list, dma_addr); 405 + } 406 + 407 + /** 408 + * free_user_pages() - Give pinned pages back 409 + * 410 + * Documentation of get_user_pages is in mm/memory.c: 411 + * 412 + * If the page is written to, set_page_dirty (or set_page_dirty_lock, 413 + * as appropriate) must be called after the page is finished with, and 414 + * before put_page is called. 415 + * 416 + * FIXME Could be of use to others and might belong in the generic 417 + * code, if others agree. E.g. 418 + * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c 419 + * ceph_put_page_vector in net/ceph/pagevec.c 420 + * maybe more? 421 + */ 422 + static int free_user_pages(struct page **page_list, unsigned int nr_pages, 423 + int dirty) 424 + { 425 + unsigned int i; 426 + 427 + for (i = 0; i < nr_pages; i++) { 428 + if (page_list[i] != NULL) { 429 + if (dirty) 430 + set_page_dirty_lock(page_list[i]); 431 + put_page(page_list[i]); 432 + } 433 + } 434 + return 0; 435 + } 436 + 437 + /** 438 + * genwqe_user_vmap() - Map user-space memory to virtual kernel memory 439 + * @cd: pointer to genwqe device 440 + * @m: mapping params 441 + * @uaddr: user virtual address 442 + * @size: size of memory to be mapped 443 + * 444 + * We need to think about how we could speed this up. Of course it is 445 + * not a good idea to do this over and over again, like we are 446 + * currently doing it. Nevertheless, I am curious where on the path 447 + * the performance is spend. Most probably within the memory 448 + * allocation functions, but maybe also in the DMA mapping code. 449 + * 450 + * Restrictions: The maximum size of the possible mapping currently depends 451 + * on the amount of memory we can get using kzalloc() for the 452 + * page_list and pci_alloc_consistent for the sg_list. 453 + * The sg_list is currently itself not scattered, which could 454 + * be fixed with some effort. The page_list must be split into 455 + * PAGE_SIZE chunks too. All that will make the complicated 456 + * code more complicated. 457 + * 458 + * Return: 0 if success 459 + */ 460 + int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, 461 + unsigned long size, struct ddcb_requ *req) 462 + { 463 + int rc = -EINVAL; 464 + unsigned long data, offs; 465 + struct pci_dev *pci_dev = cd->pci_dev; 466 + 467 + if ((uaddr == NULL) || (size == 0)) { 468 + m->size = 0; /* mark unused and not added */ 469 + return -EINVAL; 470 + } 471 + m->u_vaddr = uaddr; 472 + m->size = size; 473 + 474 + /* determine space needed for page_list. */ 475 + data = (unsigned long)uaddr; 476 + offs = offset_in_page(data); 477 + m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); 478 + 479 + m->page_list = kcalloc(m->nr_pages, 480 + sizeof(struct page *) + sizeof(dma_addr_t), 481 + GFP_KERNEL); 482 + if (!m->page_list) { 483 + dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); 484 + m->nr_pages = 0; 485 + m->u_vaddr = NULL; 486 + m->size = 0; /* mark unused and not added */ 487 + return -ENOMEM; 488 + } 489 + m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); 490 + 491 + /* pin user pages in memory */ 492 + rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ 493 + m->nr_pages, 494 + 1, /* write by caller */ 495 + m->page_list); /* ptrs to pages */ 496 + 497 + /* assumption: get_user_pages can be killed by signals. */ 498 + if (rc < m->nr_pages) { 499 + free_user_pages(m->page_list, rc, 0); 500 + rc = -EFAULT; 501 + goto fail_get_user_pages; 502 + } 503 + 504 + rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); 505 + if (rc != 0) 506 + goto fail_free_user_pages; 507 + 508 + return 0; 509 + 510 + fail_free_user_pages: 511 + free_user_pages(m->page_list, m->nr_pages, 0); 512 + 513 + fail_get_user_pages: 514 + kfree(m->page_list); 515 + m->page_list = NULL; 516 + m->dma_list = NULL; 517 + m->nr_pages = 0; 518 + m->u_vaddr = NULL; 519 + m->size = 0; /* mark unused and not added */ 520 + return rc; 521 + } 522 + 523 + /** 524 + * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel 525 + * memory 526 + * @cd: pointer to genwqe device 527 + * @m: mapping params 528 + */ 529 + int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, 530 + struct ddcb_requ *req) 531 + { 532 + struct pci_dev *pci_dev = cd->pci_dev; 533 + 534 + if (!dma_mapping_used(m)) { 535 + dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", 536 + __func__, m); 537 + return -EINVAL; 538 + } 539 + 540 + if (m->dma_list) 541 + genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); 542 + 543 + if (m->page_list) { 544 + free_user_pages(m->page_list, m->nr_pages, 1); 545 + 546 + kfree(m->page_list); 547 + m->page_list = NULL; 548 + m->dma_list = NULL; 549 + m->nr_pages = 0; 550 + } 551 + 552 + m->u_vaddr = NULL; 553 + m->size = 0; /* mark as unused and not added */ 554 + return 0; 555 + } 556 + 557 + /** 558 + * genwqe_card_type() - Get chip type SLU Configuration Register 559 + * @cd: pointer to the genwqe device descriptor 560 + * Return: 0: Altera Stratix-IV 230 561 + * 1: Altera Stratix-IV 530 562 + * 2: Altera Stratix-V A4 563 + * 3: Altera Stratix-V A7 564 + */ 565 + u8 genwqe_card_type(struct genwqe_dev *cd) 566 + { 567 + u64 card_type = cd->slu_unitcfg; 568 + return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); 569 + } 570 + 571 + /** 572 + * genwqe_card_reset() - Reset the card 573 + * @cd: pointer to the genwqe device descriptor 574 + */ 575 + int genwqe_card_reset(struct genwqe_dev *cd) 576 + { 577 + u64 softrst; 578 + struct pci_dev *pci_dev = cd->pci_dev; 579 + 580 + if (!genwqe_is_privileged(cd)) 581 + return -ENODEV; 582 + 583 + /* new SL */ 584 + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); 585 + msleep(1000); 586 + __genwqe_readq(cd, IO_HSU_FIR_CLR); 587 + __genwqe_readq(cd, IO_APP_FIR_CLR); 588 + __genwqe_readq(cd, IO_SLU_FIR_CLR); 589 + 590 + /* 591 + * Read-modify-write to preserve the stealth bits 592 + * 593 + * For SL >= 039, Stealth WE bit allows removing 594 + * the read-modify-wrote. 595 + * r-m-w may require a mask 0x3C to avoid hitting hard 596 + * reset again for error reset (should be 0, chicken). 597 + */ 598 + softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; 599 + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); 600 + 601 + /* give ERRORRESET some time to finish */ 602 + msleep(50); 603 + 604 + if (genwqe_need_err_masking(cd)) { 605 + dev_info(&pci_dev->dev, 606 + "[%s] masking errors for old bitstreams\n", __func__); 607 + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); 608 + } 609 + return 0; 610 + } 611 + 612 + int genwqe_read_softreset(struct genwqe_dev *cd) 613 + { 614 + u64 bitstream; 615 + 616 + if (!genwqe_is_privileged(cd)) 617 + return -ENODEV; 618 + 619 + bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; 620 + cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; 621 + return 0; 622 + } 623 + 624 + /** 625 + * genwqe_set_interrupt_capability() - Configure MSI capability structure 626 + * @cd: pointer to the device 627 + * Return: 0 if no error 628 + */ 629 + int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) 630 + { 631 + int rc; 632 + struct pci_dev *pci_dev = cd->pci_dev; 633 + 634 + rc = pci_enable_msi_block(pci_dev, count); 635 + if (rc == 0) 636 + cd->flags |= GENWQE_FLAG_MSI_ENABLED; 637 + return rc; 638 + } 639 + 640 + /** 641 + * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() 642 + * @cd: pointer to the device 643 + */ 644 + void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) 645 + { 646 + struct pci_dev *pci_dev = cd->pci_dev; 647 + 648 + if (cd->flags & GENWQE_FLAG_MSI_ENABLED) { 649 + pci_disable_msi(pci_dev); 650 + cd->flags &= ~GENWQE_FLAG_MSI_ENABLED; 651 + } 652 + } 653 + 654 + /** 655 + * set_reg_idx() - Fill array with data. Ignore illegal offsets. 656 + * @cd: card device 657 + * @r: debug register array 658 + * @i: index to desired entry 659 + * @m: maximum possible entries 660 + * @addr: addr which is read 661 + * @index: index in debug array 662 + * @val: read value 663 + */ 664 + static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, 665 + unsigned int *i, unsigned int m, u32 addr, u32 idx, 666 + u64 val) 667 + { 668 + if (WARN_ON_ONCE(*i >= m)) 669 + return -EFAULT; 670 + 671 + r[*i].addr = addr; 672 + r[*i].idx = idx; 673 + r[*i].val = val; 674 + ++*i; 675 + return 0; 676 + } 677 + 678 + static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, 679 + unsigned int *i, unsigned int m, u32 addr, u64 val) 680 + { 681 + return set_reg_idx(cd, r, i, m, addr, 0, val); 682 + } 683 + 684 + int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, 685 + unsigned int max_regs, int all) 686 + { 687 + unsigned int i, j, idx = 0; 688 + u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; 689 + u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; 690 + 691 + /* Global FIR */ 692 + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 693 + set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); 694 + 695 + /* UnitCfg for SLU */ 696 + sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ 697 + set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); 698 + 699 + /* UnitCfg for APP */ 700 + appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ 701 + set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); 702 + 703 + /* Check all chip Units */ 704 + for (i = 0; i < GENWQE_MAX_UNITS; i++) { 705 + 706 + /* Unit FIR */ 707 + ufir_addr = (i << 24) | 0x008; 708 + ufir = __genwqe_readq(cd, ufir_addr); 709 + set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); 710 + 711 + /* Unit FEC */ 712 + ufec_addr = (i << 24) | 0x018; 713 + ufec = __genwqe_readq(cd, ufec_addr); 714 + set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); 715 + 716 + for (j = 0; j < 64; j++) { 717 + /* wherever there is a primary 1, read the 2ndary */ 718 + if (!all && (!(ufir & (1ull << j)))) 719 + continue; 720 + 721 + sfir_addr = (i << 24) | (0x100 + 8 * j); 722 + sfir = __genwqe_readq(cd, sfir_addr); 723 + set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); 724 + 725 + sfec_addr = (i << 24) | (0x300 + 8 * j); 726 + sfec = __genwqe_readq(cd, sfec_addr); 727 + set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); 728 + } 729 + } 730 + 731 + /* fill with invalid data until end */ 732 + for (i = idx; i < max_regs; i++) { 733 + regs[i].addr = 0xffffffff; 734 + regs[i].val = 0xffffffffffffffffull; 735 + } 736 + return idx; 737 + } 738 + 739 + /** 740 + * genwqe_ffdc_buff_size() - Calculates the number of dump registers 741 + */ 742 + int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) 743 + { 744 + int entries = 0, ring, traps, traces, trace_entries; 745 + u32 eevptr_addr, l_addr, d_len, d_type; 746 + u64 eevptr, val, addr; 747 + 748 + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; 749 + eevptr = __genwqe_readq(cd, eevptr_addr); 750 + 751 + if ((eevptr != 0x0) && (eevptr != -1ull)) { 752 + l_addr = GENWQE_UID_OFFS(uid) | eevptr; 753 + 754 + while (1) { 755 + val = __genwqe_readq(cd, l_addr); 756 + 757 + if ((val == 0x0) || (val == -1ull)) 758 + break; 759 + 760 + /* 38:24 */ 761 + d_len = (val & 0x0000007fff000000ull) >> 24; 762 + 763 + /* 39 */ 764 + d_type = (val & 0x0000008000000000ull) >> 36; 765 + 766 + if (d_type) { /* repeat */ 767 + entries += d_len; 768 + } else { /* size in bytes! */ 769 + entries += d_len >> 3; 770 + } 771 + 772 + l_addr += 8; 773 + } 774 + } 775 + 776 + for (ring = 0; ring < 8; ring++) { 777 + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); 778 + val = __genwqe_readq(cd, addr); 779 + 780 + if ((val == 0x0ull) || (val == -1ull)) 781 + continue; 782 + 783 + traps = (val >> 24) & 0xff; 784 + traces = (val >> 16) & 0xff; 785 + trace_entries = val & 0xffff; 786 + 787 + entries += traps + (traces * trace_entries); 788 + } 789 + return entries; 790 + } 791 + 792 + /** 793 + * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure 794 + */ 795 + int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, 796 + struct genwqe_reg *regs, unsigned int max_regs) 797 + { 798 + int i, traps, traces, trace, trace_entries, trace_entry, ring; 799 + unsigned int idx = 0; 800 + u32 eevptr_addr, l_addr, d_addr, d_len, d_type; 801 + u64 eevptr, e, val, addr; 802 + 803 + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; 804 + eevptr = __genwqe_readq(cd, eevptr_addr); 805 + 806 + if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { 807 + l_addr = GENWQE_UID_OFFS(uid) | eevptr; 808 + while (1) { 809 + e = __genwqe_readq(cd, l_addr); 810 + if ((e == 0x0) || (e == 0xffffffffffffffffull)) 811 + break; 812 + 813 + d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ 814 + d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ 815 + d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ 816 + d_addr |= GENWQE_UID_OFFS(uid); 817 + 818 + if (d_type) { 819 + for (i = 0; i < (int)d_len; i++) { 820 + val = __genwqe_readq(cd, d_addr); 821 + set_reg_idx(cd, regs, &idx, max_regs, 822 + d_addr, i, val); 823 + } 824 + } else { 825 + d_len >>= 3; /* Size in bytes! */ 826 + for (i = 0; i < (int)d_len; i++, d_addr += 8) { 827 + val = __genwqe_readq(cd, d_addr); 828 + set_reg_idx(cd, regs, &idx, max_regs, 829 + d_addr, 0, val); 830 + } 831 + } 832 + l_addr += 8; 833 + } 834 + } 835 + 836 + /* 837 + * To save time, there are only 6 traces poplulated on Uid=2, 838 + * Ring=1. each with iters=512. 839 + */ 840 + for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, 841 + 2...7 are ASI rings */ 842 + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); 843 + val = __genwqe_readq(cd, addr); 844 + 845 + if ((val == 0x0ull) || (val == -1ull)) 846 + continue; 847 + 848 + traps = (val >> 24) & 0xff; /* Number of Traps */ 849 + traces = (val >> 16) & 0xff; /* Number of Traces */ 850 + trace_entries = val & 0xffff; /* Entries per trace */ 851 + 852 + /* Note: This is a combined loop that dumps both the traps */ 853 + /* (for the trace == 0 case) as well as the traces 1 to */ 854 + /* 'traces'. */ 855 + for (trace = 0; trace <= traces; trace++) { 856 + u32 diag_sel = 857 + GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); 858 + 859 + addr = (GENWQE_UID_OFFS(uid) | 860 + IO_EXTENDED_DIAG_SELECTOR); 861 + __genwqe_writeq(cd, addr, diag_sel); 862 + 863 + for (trace_entry = 0; 864 + trace_entry < (trace ? trace_entries : traps); 865 + trace_entry++) { 866 + addr = (GENWQE_UID_OFFS(uid) | 867 + IO_EXTENDED_DIAG_READ_MBX); 868 + val = __genwqe_readq(cd, addr); 869 + set_reg_idx(cd, regs, &idx, max_regs, addr, 870 + (diag_sel<<16) | trace_entry, val); 871 + } 872 + } 873 + } 874 + return 0; 875 + } 876 + 877 + /** 878 + * genwqe_write_vreg() - Write register in virtual window 879 + * 880 + * Note, these registers are only accessible to the PF through the 881 + * VF-window. It is not intended for the VF to access. 882 + */ 883 + int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) 884 + { 885 + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); 886 + __genwqe_writeq(cd, reg, val); 887 + return 0; 888 + } 889 + 890 + /** 891 + * genwqe_read_vreg() - Read register in virtual window 892 + * 893 + * Note, these registers are only accessible to the PF through the 894 + * VF-window. It is not intended for the VF to access. 895 + */ 896 + u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) 897 + { 898 + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); 899 + return __genwqe_readq(cd, reg); 900 + } 901 + 902 + /** 903 + * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card 904 + * 905 + * Note: From a design perspective it turned out to be a bad idea to 906 + * use codes here to specifiy the frequency/speed values. An old 907 + * driver cannot understand new codes and is therefore always a 908 + * problem. Better is to measure out the value or put the 909 + * speed/frequency directly into a register which is always a valid 910 + * value for old as well as for new software. 911 + * 912 + * Return: Card clock in MHz 913 + */ 914 + int genwqe_base_clock_frequency(struct genwqe_dev *cd) 915 + { 916 + u16 speed; /* MHz MHz MHz MHz */ 917 + static const int speed_grade[] = { 250, 200, 166, 175 }; 918 + 919 + speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); 920 + if (speed >= ARRAY_SIZE(speed_grade)) 921 + return 0; /* illegal value */ 922 + 923 + return speed_grade[speed]; 924 + } 925 + 926 + /** 927 + * genwqe_stop_traps() - Stop traps 928 + * 929 + * Before reading out the analysis data, we need to stop the traps. 930 + */ 931 + void genwqe_stop_traps(struct genwqe_dev *cd) 932 + { 933 + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); 934 + } 935 + 936 + /** 937 + * genwqe_start_traps() - Start traps 938 + * 939 + * After having read the data, we can/must enable the traps again. 940 + */ 941 + void genwqe_start_traps(struct genwqe_dev *cd) 942 + { 943 + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); 944 + 945 + if (genwqe_need_err_masking(cd)) 946 + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); 947 + }