Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

staging/hfi1: Enable TID caching feature

This commit "flips the switch" on the TID caching feature
implemented in this patch series.

As well as enabling the new feature by tying the new function
with the PSM API, it also cleans up the old unneeded code,
data structure members, and variables.

Due to difference in operation and information, the tracing
functions related to expected receives had to be changed. This
patch include these changes.

The tracing function changes could not be split into a separate
commit without including both tracing variants at the same time.
This would have caused other complications and ugliness.

Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Mitko Haralanov and committed by
Doug Ledford
0b091fb3 7e7a436e

+133 -499
+34 -414
drivers/staging/rdma/hfi1/file_ops.c
··· 96 96 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); 97 97 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); 98 98 static int vma_fault(struct vm_area_struct *, struct vm_fault *); 99 - static int exp_tid_setup(struct file *, struct hfi1_tid_info *); 100 - static int exp_tid_free(struct file *, struct hfi1_tid_info *); 101 - static void unlock_exp_tids(struct hfi1_ctxtdata *); 102 99 103 100 static const struct file_operations hfi1_file_ops = { 104 101 .owner = THIS_MODULE, ··· 185 188 struct hfi1_cmd cmd; 186 189 struct hfi1_user_info uinfo; 187 190 struct hfi1_tid_info tinfo; 191 + unsigned long addr; 188 192 ssize_t consumed = 0, copy = 0, ret = 0; 189 193 void *dest = NULL; 190 194 __u64 user_val = 0; ··· 217 219 break; 218 220 case HFI1_CMD_TID_UPDATE: 219 221 case HFI1_CMD_TID_FREE: 222 + case HFI1_CMD_TID_INVAL_READ: 220 223 copy = sizeof(tinfo); 221 224 dest = &tinfo; 222 225 break; ··· 240 241 must_be_root = 1; /* validate user */ 241 242 copy = 0; 242 243 break; 243 - case HFI1_CMD_TID_INVAL_READ: 244 244 default: 245 245 ret = -EINVAL; 246 246 goto bail; ··· 293 295 sc_return_credits(uctxt->sc); 294 296 break; 295 297 case HFI1_CMD_TID_UPDATE: 296 - ret = exp_tid_setup(fp, &tinfo); 298 + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); 297 299 if (!ret) { 298 - unsigned long addr; 299 300 /* 300 301 * Copy the number of tidlist entries we used 301 302 * and the length of the buffer we registered. ··· 309 312 ret = -EFAULT; 310 313 } 311 314 break; 315 + case HFI1_CMD_TID_INVAL_READ: 316 + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); 317 + if (ret) 318 + break; 319 + addr = (unsigned long)cmd.addr + 320 + offsetof(struct hfi1_tid_info, tidcnt); 321 + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 322 + sizeof(tinfo.tidcnt))) 323 + ret = -EFAULT; 324 + break; 312 325 case HFI1_CMD_TID_FREE: 313 - ret = exp_tid_free(fp, &tinfo); 326 + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); 327 + if (ret) 328 + break; 329 + addr = (unsigned long)cmd.addr + 330 + offsetof(struct hfi1_tid_info, tidcnt); 331 + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 332 + sizeof(tinfo.tidcnt))) 333 + ret = -EFAULT; 314 334 break; 315 335 case HFI1_CMD_RECV_CTRL: 316 336 ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); ··· 793 779 uctxt->pionowait = 0; 794 780 uctxt->event_flags = 0; 795 781 796 - hfi1_clear_tids(uctxt); 782 + hfi1_user_exp_rcv_free(fdata); 797 783 hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 798 - 799 - if (uctxt->tid_pg_list) 800 - unlock_exp_tids(uctxt); 801 784 802 785 hfi1_stats.sps_ctxts--; 803 786 dd->freectxts++; ··· 1118 1107 ret = wait_event_interruptible(uctxt->wait, 1119 1108 !test_bit(HFI1_CTXT_MASTER_UNINIT, 1120 1109 &uctxt->event_flags)); 1121 - goto done; 1110 + goto expected; 1122 1111 } 1123 1112 1124 1113 /* initialize poll variables... */ ··· 1165 1154 clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); 1166 1155 wake_up(&uctxt->wait); 1167 1156 } 1168 - ret = 0; 1169 1157 1158 + expected: 1159 + /* 1160 + * Expected receive has to be setup for all processes (including 1161 + * shared contexts). However, it has to be done after the master 1162 + * context has been fully configured as it depends on the 1163 + * eager/expected split of the RcvArray entries. 1164 + * Setting it up here ensures that the subcontexts will be waiting 1165 + * (due to the above wait_event_interruptible() until the master 1166 + * is setup. 1167 + */ 1168 + ret = hfi1_user_exp_rcv_init(fp); 1170 1169 done: 1171 1170 return ret; 1172 1171 } ··· 1246 1225 if (ret) 1247 1226 goto done; 1248 1227 } 1249 - /* Setup Expected Rcv memories */ 1250 - uctxt->tid_pg_list = vzalloc(uctxt->expected_count * 1251 - sizeof(struct page **)); 1252 - if (!uctxt->tid_pg_list) { 1253 - ret = -ENOMEM; 1254 - goto done; 1255 - } 1256 - uctxt->physshadow = vzalloc(uctxt->expected_count * 1257 - sizeof(*uctxt->physshadow)); 1258 - if (!uctxt->physshadow) { 1259 - ret = -ENOMEM; 1260 - goto done; 1261 - } 1262 - /* allocate expected TID map and initialize the cursor */ 1263 - atomic_set(&uctxt->tidcursor, 0); 1264 - uctxt->numtidgroups = uctxt->expected_count / 1265 - dd->rcv_entries.group_size; 1266 - uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG + 1267 - !!(uctxt->numtidgroups % BITS_PER_LONG); 1268 - uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt * 1269 - sizeof(*uctxt->tidusemap), 1270 - GFP_KERNEL, uctxt->numa_id); 1271 - if (!uctxt->tidusemap) { 1272 - ret = -ENOMEM; 1273 - goto done; 1274 - } 1275 - /* 1276 - * In case that the number of groups is not a multiple of 1277 - * 64 (the number of groups in a tidusemap element), mark 1278 - * the extra ones as used. This will effectively make them 1279 - * permanently used and should never be assigned. Otherwise, 1280 - * the code which checks how many free groups we have will 1281 - * get completely confused about the state of the bits. 1282 - */ 1283 - if (uctxt->numtidgroups % BITS_PER_LONG) 1284 - uctxt->tidusemap[uctxt->tidmapcnt - 1] = 1285 - ~((1ULL << (uctxt->numtidgroups % 1286 - BITS_PER_LONG)) - 1); 1287 - trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, 1288 - uctxt->tidusemap, uctxt->tidmapcnt); 1289 1228 } 1290 1229 ret = hfi1_user_sdma_alloc_queues(uctxt, fp); 1291 1230 if (ret) ··· 1482 1501 clear_bit(i, evs); 1483 1502 } 1484 1503 return 0; 1485 - } 1486 - 1487 - #define num_user_pages(vaddr, len) \ 1488 - (1 + (((((unsigned long)(vaddr) + \ 1489 - (unsigned long)(len) - 1) & PAGE_MASK) - \ 1490 - ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) 1491 - 1492 - /** 1493 - * tzcnt - count the number of trailing zeros in a 64bit value 1494 - * @value: the value to be examined 1495 - * 1496 - * Returns the number of trailing least significant zeros in the 1497 - * the input value. If the value is zero, return the number of 1498 - * bits of the value. 1499 - */ 1500 - static inline u8 tzcnt(u64 value) 1501 - { 1502 - return value ? __builtin_ctzl(value) : sizeof(value) * 8; 1503 - } 1504 - 1505 - static inline unsigned num_free_groups(unsigned long map, u16 *start) 1506 - { 1507 - unsigned free; 1508 - u16 bitidx = *start; 1509 - 1510 - if (bitidx >= BITS_PER_LONG) 1511 - return 0; 1512 - /* "Turn off" any bits set before our bit index */ 1513 - map &= ~((1ULL << bitidx) - 1); 1514 - free = tzcnt(map) - bitidx; 1515 - while (!free && bitidx < BITS_PER_LONG) { 1516 - /* Zero out the last set bit so we look at the rest */ 1517 - map &= ~(1ULL << bitidx); 1518 - /* 1519 - * Account for the previously checked bits and advance 1520 - * the bit index. We don't have to check for bitidx 1521 - * getting bigger than BITS_PER_LONG here as it would 1522 - * mean extra instructions that we don't need. If it 1523 - * did happen, it would push free to a negative value 1524 - * which will break the loop. 1525 - */ 1526 - free = tzcnt(map) - ++bitidx; 1527 - } 1528 - *start = bitidx; 1529 - return free; 1530 - } 1531 - 1532 - static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) 1533 - { 1534 - int ret = 0; 1535 - struct hfi1_filedata *fd = fp->private_data; 1536 - struct hfi1_ctxtdata *uctxt = fd->uctxt; 1537 - struct hfi1_devdata *dd = uctxt->dd; 1538 - unsigned tid, mapped = 0, npages, ngroups, exp_groups, 1539 - tidpairs = uctxt->expected_count / 2; 1540 - struct page **pages; 1541 - unsigned long vaddr, tidmap[uctxt->tidmapcnt]; 1542 - dma_addr_t *phys; 1543 - u32 tidlist[tidpairs], pairidx = 0, tidcursor; 1544 - u16 useidx, idx, bitidx, tidcnt = 0; 1545 - 1546 - vaddr = tinfo->vaddr; 1547 - 1548 - if (offset_in_page(vaddr)) { 1549 - ret = -EINVAL; 1550 - goto bail; 1551 - } 1552 - 1553 - npages = num_user_pages(vaddr, tinfo->length); 1554 - if (!npages) { 1555 - ret = -EINVAL; 1556 - goto bail; 1557 - } 1558 - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, 1559 - npages * PAGE_SIZE)) { 1560 - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", 1561 - (void *)vaddr, npages); 1562 - ret = -EFAULT; 1563 - goto bail; 1564 - } 1565 - 1566 - memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt); 1567 - memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs); 1568 - 1569 - exp_groups = uctxt->expected_count / dd->rcv_entries.group_size; 1570 - /* which group set do we look at first? */ 1571 - tidcursor = atomic_read(&uctxt->tidcursor); 1572 - useidx = (tidcursor >> 16) & 0xffff; 1573 - bitidx = tidcursor & 0xffff; 1574 - 1575 - /* 1576 - * Keep going until we've mapped all pages or we've exhausted all 1577 - * RcvArray entries. 1578 - * This iterates over the number of tidmaps + 1 1579 - * (idx <= uctxt->tidmapcnt) so we check the bitmap which we 1580 - * started from one more time for any free bits before the 1581 - * starting point bit. 1582 - */ 1583 - for (mapped = 0, idx = 0; 1584 - mapped < npages && idx <= uctxt->tidmapcnt;) { 1585 - u64 i, offset = 0; 1586 - unsigned free, pinned, pmapped = 0, bits_used; 1587 - u16 grp; 1588 - 1589 - /* 1590 - * "Reserve" the needed group bits under lock so other 1591 - * processes can't step in the middle of it. Once 1592 - * reserved, we don't need the lock anymore since we 1593 - * are guaranteed the groups. 1594 - */ 1595 - mutex_lock(&uctxt->exp_lock); 1596 - if (uctxt->tidusemap[useidx] == -1ULL || 1597 - bitidx >= BITS_PER_LONG) { 1598 - /* no free groups in the set, use the next */ 1599 - useidx = (useidx + 1) % uctxt->tidmapcnt; 1600 - idx++; 1601 - bitidx = 0; 1602 - mutex_unlock(&uctxt->exp_lock); 1603 - continue; 1604 - } 1605 - ngroups = ((npages - mapped) / dd->rcv_entries.group_size) + 1606 - !!((npages - mapped) % dd->rcv_entries.group_size); 1607 - 1608 - /* 1609 - * If we've gotten here, the current set of groups does have 1610 - * one or more free groups. 1611 - */ 1612 - free = num_free_groups(uctxt->tidusemap[useidx], &bitidx); 1613 - if (!free) { 1614 - /* 1615 - * Despite the check above, free could still come back 1616 - * as 0 because we don't check the entire bitmap but 1617 - * we start from bitidx. 1618 - */ 1619 - mutex_unlock(&uctxt->exp_lock); 1620 - continue; 1621 - } 1622 - bits_used = min(free, ngroups); 1623 - tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx; 1624 - uctxt->tidusemap[useidx] |= tidmap[useidx]; 1625 - mutex_unlock(&uctxt->exp_lock); 1626 - 1627 - /* 1628 - * At this point, we know where in the map we have free bits. 1629 - * properly offset into the various "shadow" arrays and compute 1630 - * the RcvArray entry index. 1631 - */ 1632 - offset = ((useidx * BITS_PER_LONG) + bitidx) * 1633 - dd->rcv_entries.group_size; 1634 - pages = uctxt->tid_pg_list + offset; 1635 - phys = uctxt->physshadow + offset; 1636 - tid = uctxt->expected_base + offset; 1637 - 1638 - /* Calculate how many pages we can pin based on free bits */ 1639 - pinned = min((bits_used * dd->rcv_entries.group_size), 1640 - (npages - mapped)); 1641 - /* 1642 - * Now that we know how many free RcvArray entries we have, 1643 - * we can pin that many user pages. 1644 - */ 1645 - ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE), 1646 - pinned, true, pages); 1647 - if (ret) { 1648 - /* 1649 - * We can't continue because the pages array won't be 1650 - * initialized. This should never happen, 1651 - * unless perhaps the user has mpin'ed the pages 1652 - * themselves. 1653 - */ 1654 - dd_dev_info(dd, 1655 - "Failed to lock addr %p, %u pages: errno %d\n", 1656 - (void *) vaddr, pinned, -ret); 1657 - /* 1658 - * Let go of the bits that we reserved since we are not 1659 - * going to use them. 1660 - */ 1661 - mutex_lock(&uctxt->exp_lock); 1662 - uctxt->tidusemap[useidx] &= 1663 - ~(((1ULL << bits_used) - 1) << bitidx); 1664 - mutex_unlock(&uctxt->exp_lock); 1665 - goto done; 1666 - } 1667 - /* 1668 - * How many groups do we need based on how many pages we have 1669 - * pinned? 1670 - */ 1671 - ngroups = (pinned / dd->rcv_entries.group_size) + 1672 - !!(pinned % dd->rcv_entries.group_size); 1673 - /* 1674 - * Keep programming RcvArray entries for all the <ngroups> free 1675 - * groups. 1676 - */ 1677 - for (i = 0, grp = 0; grp < ngroups; i++, grp++) { 1678 - unsigned j; 1679 - u32 pair_size = 0, tidsize; 1680 - /* 1681 - * This inner loop will program an entire group or the 1682 - * array of pinned pages (which ever limit is hit 1683 - * first). 1684 - */ 1685 - for (j = 0; j < dd->rcv_entries.group_size && 1686 - pmapped < pinned; j++, pmapped++, tid++) { 1687 - tidsize = PAGE_SIZE; 1688 - phys[pmapped] = hfi1_map_page(dd->pcidev, 1689 - pages[pmapped], 0, 1690 - tidsize, PCI_DMA_FROMDEVICE); 1691 - trace_hfi1_exp_rcv_set(uctxt->ctxt, 1692 - fd->subctxt, 1693 - tid, vaddr, 1694 - phys[pmapped], 1695 - pages[pmapped]); 1696 - /* 1697 - * Each RcvArray entry is programmed with one 1698 - * page * worth of memory. This will handle 1699 - * the 8K MTU as well as anything smaller 1700 - * due to the fact that both entries in the 1701 - * RcvTidPair are programmed with a page. 1702 - * PSM currently does not handle anything 1703 - * bigger than 8K MTU, so should we even worry 1704 - * about 10K here? 1705 - */ 1706 - hfi1_put_tid(dd, tid, PT_EXPECTED, 1707 - phys[pmapped], 1708 - ilog2(tidsize >> PAGE_SHIFT) + 1); 1709 - pair_size += tidsize >> PAGE_SHIFT; 1710 - EXP_TID_RESET(tidlist[pairidx], LEN, pair_size); 1711 - if (!(tid % 2)) { 1712 - tidlist[pairidx] |= 1713 - EXP_TID_SET(IDX, 1714 - (tid - uctxt->expected_base) 1715 - / 2); 1716 - tidlist[pairidx] |= 1717 - EXP_TID_SET(CTRL, 1); 1718 - tidcnt++; 1719 - } else { 1720 - tidlist[pairidx] |= 1721 - EXP_TID_SET(CTRL, 2); 1722 - pair_size = 0; 1723 - pairidx++; 1724 - } 1725 - } 1726 - /* 1727 - * We've programmed the entire group (or as much of the 1728 - * group as we'll use. Now, it's time to push it out... 1729 - */ 1730 - flush_wc(); 1731 - } 1732 - mapped += pinned; 1733 - atomic_set(&uctxt->tidcursor, 1734 - (((useidx & 0xffffff) << 16) | 1735 - ((bitidx + bits_used) & 0xffffff))); 1736 - } 1737 - trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap, 1738 - uctxt->tidmapcnt); 1739 - 1740 - done: 1741 - /* If we've mapped anything, copy relevant info to user */ 1742 - if (mapped) { 1743 - if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, 1744 - tidlist, sizeof(tidlist[0]) * tidcnt)) { 1745 - ret = -EFAULT; 1746 - goto done; 1747 - } 1748 - /* copy TID info to user */ 1749 - if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap, 1750 - tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt)) 1751 - ret = -EFAULT; 1752 - } 1753 - bail: 1754 - /* 1755 - * Calculate mapped length. New Exp TID protocol does not "unwind" and 1756 - * report an error if it can't map the entire buffer. It just reports 1757 - * the length that was mapped. 1758 - */ 1759 - tinfo->length = mapped * PAGE_SIZE; 1760 - tinfo->tidcnt = tidcnt; 1761 - return ret; 1762 - } 1763 - 1764 - static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo) 1765 - { 1766 - struct hfi1_filedata *fd = fp->private_data; 1767 - struct hfi1_ctxtdata *uctxt = fd->uctxt; 1768 - struct hfi1_devdata *dd = uctxt->dd; 1769 - unsigned long tidmap[uctxt->tidmapcnt]; 1770 - struct page **pages; 1771 - dma_addr_t *phys; 1772 - u16 idx, bitidx, tid; 1773 - int ret = 0; 1774 - 1775 - if (copy_from_user(&tidmap, (void __user *)(unsigned long) 1776 - tinfo->tidmap, 1777 - sizeof(tidmap[0]) * uctxt->tidmapcnt)) { 1778 - ret = -EFAULT; 1779 - goto done; 1780 - } 1781 - for (idx = 0; idx < uctxt->tidmapcnt; idx++) { 1782 - unsigned long map; 1783 - 1784 - bitidx = 0; 1785 - if (!tidmap[idx]) 1786 - continue; 1787 - map = tidmap[idx]; 1788 - while ((bitidx = tzcnt(map)) < BITS_PER_LONG) { 1789 - int i, pcount = 0; 1790 - struct page *pshadow[dd->rcv_entries.group_size]; 1791 - unsigned offset = ((idx * BITS_PER_LONG) + bitidx) * 1792 - dd->rcv_entries.group_size; 1793 - 1794 - pages = uctxt->tid_pg_list + offset; 1795 - phys = uctxt->physshadow + offset; 1796 - tid = uctxt->expected_base + offset; 1797 - for (i = 0; i < dd->rcv_entries.group_size; 1798 - i++, tid++) { 1799 - if (pages[i]) { 1800 - hfi1_put_tid(dd, tid, PT_INVALID, 1801 - 0, 0); 1802 - trace_hfi1_exp_rcv_free(uctxt->ctxt, 1803 - fd->subctxt, 1804 - tid, phys[i], 1805 - pages[i]); 1806 - pci_unmap_page(dd->pcidev, phys[i], 1807 - PAGE_SIZE, PCI_DMA_FROMDEVICE); 1808 - pshadow[pcount] = pages[i]; 1809 - pages[i] = NULL; 1810 - pcount++; 1811 - phys[i] = 0; 1812 - } 1813 - } 1814 - flush_wc(); 1815 - hfi1_release_user_pages(pshadow, pcount, true); 1816 - clear_bit(bitidx, &uctxt->tidusemap[idx]); 1817 - map &= ~(1ULL<<bitidx); 1818 - } 1819 - } 1820 - trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap, 1821 - uctxt->tidmapcnt); 1822 - done: 1823 - return ret; 1824 - } 1825 - 1826 - static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt) 1827 - { 1828 - struct hfi1_devdata *dd = uctxt->dd; 1829 - unsigned tid; 1830 - 1831 - dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n", 1832 - uctxt->ctxt); 1833 - for (tid = 0; tid < uctxt->expected_count; tid++) { 1834 - struct page *p = uctxt->tid_pg_list[tid]; 1835 - dma_addr_t phys; 1836 - 1837 - if (!p) 1838 - continue; 1839 - 1840 - phys = uctxt->physshadow[tid]; 1841 - uctxt->physshadow[tid] = 0; 1842 - uctxt->tid_pg_list[tid] = NULL; 1843 - pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE); 1844 - hfi1_release_user_pages(&p, 1, true); 1845 - } 1846 1504 } 1847 1505 1848 1506 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
-14
drivers/staging/rdma/hfi1/hfi.h
··· 240 240 u32 expected_count; 241 241 /* index of first expected TID entry. */ 242 242 u32 expected_base; 243 - /* cursor into the exp group sets */ 244 - atomic_t tidcursor; 245 - /* number of exp TID groups assigned to the ctxt */ 246 - u16 numtidgroups; 247 - /* size of exp TID group fields in tidusemap */ 248 - u16 tidmapcnt; 249 - /* exp TID group usage bitfield array */ 250 - unsigned long *tidusemap; 251 - /* pinned pages for exp sends, allocated at open */ 252 - struct page **tid_pg_list; 253 - /* dma handles for exp tid pages */ 254 - dma_addr_t *physshadow; 255 243 256 244 struct exp_tid_set tid_group_list; 257 245 struct exp_tid_set tid_used_list; ··· 1648 1660 enum platform_config_table_type_encoding table_type, 1649 1661 int table_index, int field_index, u32 *data, u32 len); 1650 1662 1651 - dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long, 1652 - size_t, int); 1653 1663 const char *get_unit_name(int unit); 1654 1664 1655 1665 /*
-3
drivers/staging/rdma/hfi1/init.c
··· 962 962 kfree(rcd->egrbufs.buffers); 963 963 964 964 sc_free(rcd->sc); 965 - vfree(rcd->physshadow); 966 - vfree(rcd->tid_pg_list); 967 965 vfree(rcd->user_event_mask); 968 966 vfree(rcd->subctxt_uregbase); 969 967 vfree(rcd->subctxt_rcvegrbuf); 970 968 vfree(rcd->subctxt_rcvhdr_base); 971 - kfree(rcd->tidusemap); 972 969 kfree(rcd->opstats); 973 970 kfree(rcd); 974 971 }
+86 -48
drivers/staging/rdma/hfi1/trace.h
··· 153 153 ) 154 154 ); 155 155 156 - const char *print_u64_array(struct trace_seq *, u64 *, int); 157 - 158 - TRACE_EVENT(hfi1_exp_tid_map, 159 - TP_PROTO(unsigned ctxt, u16 subctxt, int dir, 160 - unsigned long *maps, u16 count), 161 - TP_ARGS(ctxt, subctxt, dir, maps, count), 156 + TRACE_EVENT(hfi1_exp_tid_reg, 157 + TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, 158 + u32 npages, unsigned long va, unsigned long pa, 159 + dma_addr_t dma), 160 + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 162 161 TP_STRUCT__entry( 163 162 __field(unsigned, ctxt) 164 163 __field(u16, subctxt) 165 - __field(int, dir) 166 - __field(u16, count) 167 - __dynamic_array(unsigned long, maps, sizeof(*maps) * count) 164 + __field(u32, rarr) 165 + __field(u32, npages) 166 + __field(unsigned long, va) 167 + __field(unsigned long, pa) 168 + __field(dma_addr_t, dma) 168 169 ), 169 170 TP_fast_assign( 170 171 __entry->ctxt = ctxt; 171 172 __entry->subctxt = subctxt; 172 - __entry->dir = dir; 173 - __entry->count = count; 174 - memcpy(__get_dynamic_array(maps), maps, 175 - sizeof(*maps) * count); 173 + __entry->rarr = rarr; 174 + __entry->npages = npages; 175 + __entry->va = va; 176 + __entry->pa = pa; 177 + __entry->dma = dma; 176 178 ), 177 - TP_printk("[%3u:%02u] %s tidmaps %s", 179 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 178 180 __entry->ctxt, 179 181 __entry->subctxt, 180 - (__entry->dir ? ">" : "<"), 181 - print_u64_array(p, __get_dynamic_array(maps), 182 - __entry->count) 182 + __entry->rarr, 183 + __entry->npages, 184 + __entry->pa, 185 + __entry->va, 186 + __entry->dma 183 187 ) 184 188 ); 185 189 186 - TRACE_EVENT(hfi1_exp_rcv_set, 187 - TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid, 188 - unsigned long vaddr, u64 phys_addr, void *page), 189 - TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page), 190 + TRACE_EVENT(hfi1_exp_tid_unreg, 191 + TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages, 192 + unsigned long va, unsigned long pa, dma_addr_t dma), 193 + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 190 194 TP_STRUCT__entry( 191 195 __field(unsigned, ctxt) 192 196 __field(u16, subctxt) 193 - __field(u32, tid) 194 - __field(unsigned long, vaddr) 195 - __field(u64, phys_addr) 196 - __field(void *, page) 197 + __field(u32, rarr) 198 + __field(u32, npages) 199 + __field(unsigned long, va) 200 + __field(unsigned long, pa) 201 + __field(dma_addr_t, dma) 197 202 ), 198 203 TP_fast_assign( 199 204 __entry->ctxt = ctxt; 200 205 __entry->subctxt = subctxt; 201 - __entry->tid = tid; 202 - __entry->vaddr = vaddr; 203 - __entry->phys_addr = phys_addr; 204 - __entry->page = page; 206 + __entry->rarr = rarr; 207 + __entry->npages = npages; 208 + __entry->va = va; 209 + __entry->pa = pa; 210 + __entry->dma = dma; 205 211 ), 206 - TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p", 212 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 207 213 __entry->ctxt, 208 214 __entry->subctxt, 209 - __entry->tid, 210 - __entry->vaddr, 211 - __entry->phys_addr, 212 - __entry->page 215 + __entry->rarr, 216 + __entry->npages, 217 + __entry->pa, 218 + __entry->va, 219 + __entry->dma 213 220 ) 214 221 ); 215 222 216 - TRACE_EVENT(hfi1_exp_rcv_free, 217 - TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid, 218 - unsigned long phys, void *page), 219 - TP_ARGS(ctxt, subctxt, tid, phys, page), 223 + TRACE_EVENT(hfi1_exp_tid_inval, 224 + TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr, 225 + u32 npages, dma_addr_t dma), 226 + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), 220 227 TP_STRUCT__entry( 221 228 __field(unsigned, ctxt) 222 229 __field(u16, subctxt) 223 - __field(u32, tid) 224 - __field(unsigned long, phys) 225 - __field(void *, page) 230 + __field(unsigned long, va) 231 + __field(u32, rarr) 232 + __field(u32, npages) 233 + __field(dma_addr_t, dma) 226 234 ), 227 235 TP_fast_assign( 228 236 __entry->ctxt = ctxt; 229 237 __entry->subctxt = subctxt; 230 - __entry->tid = tid; 231 - __entry->phys = phys; 232 - __entry->page = page; 238 + __entry->va = va; 239 + __entry->rarr = rarr; 240 + __entry->npages = npages; 241 + __entry->dma = dma; 233 242 ), 234 - TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p", 243 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", 235 244 __entry->ctxt, 236 245 __entry->subctxt, 237 - __entry->tid, 238 - __entry->phys, 239 - __entry->page 246 + __entry->rarr, 247 + __entry->npages, 248 + __entry->va, 249 + __entry->dma 240 250 ) 241 251 ); 252 + 253 + TRACE_EVENT(hfi1_mmu_invalidate, 254 + TP_PROTO(unsigned ctxt, u16 subctxt, const char *type, 255 + unsigned long start, unsigned long end), 256 + TP_ARGS(ctxt, subctxt, type, start, end), 257 + TP_STRUCT__entry( 258 + __field(unsigned, ctxt) 259 + __field(u16, subctxt) 260 + __string(type, type) 261 + __field(unsigned long, start) 262 + __field(unsigned long, end) 263 + ), 264 + TP_fast_assign( 265 + __entry->ctxt = ctxt; 266 + __entry->subctxt = subctxt; 267 + __assign_str(type, type); 268 + __entry->start = start; 269 + __entry->end = end; 270 + ), 271 + TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", 272 + __entry->ctxt, 273 + __entry->subctxt, 274 + __get_str(type), 275 + __entry->start, 276 + __entry->end 277 + ) 278 + ); 279 + 242 280 #undef TRACE_SYSTEM 243 281 #define TRACE_SYSTEM hfi1_tx 244 282
+12
drivers/staging/rdma/hfi1/user_exp_rcv.c
··· 902 902 return -EFAULT; 903 903 } 904 904 hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); 905 + trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, 906 + npages, node->virt, node->phys, phys); 905 907 return 0; 906 908 } 907 909 ··· 948 946 { 949 947 struct hfi1_ctxtdata *uctxt = fd->uctxt; 950 948 struct hfi1_devdata *dd = uctxt->dd; 949 + 950 + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, 951 + node->npages, node->virt, node->phys, 952 + node->dma_addr); 951 953 952 954 hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); 953 955 /* ··· 1029 1023 struct mmu_rb_node *node; 1030 1024 unsigned long addr = start; 1031 1025 1026 + trace_hfi1_mmu_invalidate(uctxt->ctxt, fd->subctxt, mmu_types[type], 1027 + start, end); 1028 + 1032 1029 spin_lock(&fd->rb_lock); 1033 1030 while (addr < end) { 1034 1031 node = mmu_rb_search_by_addr(root, addr); ··· 1058 1049 if (node->freed) 1059 1050 continue; 1060 1051 1052 + trace_hfi1_exp_tid_inval(uctxt->ctxt, fd->subctxt, node->virt, 1053 + node->rcventry, node->npages, 1054 + node->dma_addr); 1061 1055 node->freed = true; 1062 1056 1063 1057 spin_lock(&fd->invalid_lock);
-14
drivers/staging/rdma/hfi1/user_pages.c
··· 54 54 55 55 #include "hfi.h" 56 56 57 - /** 58 - * hfi1_map_page - a safety wrapper around pci_map_page() 59 - * 60 - */ 61 - dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page, 62 - unsigned long offset, size_t size, int direction) 63 - { 64 - dma_addr_t phys; 65 - 66 - phys = pci_map_page(hwdev, page, offset, size, direction); 67 - 68 - return phys; 69 - } 70 - 71 57 int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, 72 58 struct page **pages) 73 59 {
+1 -6
include/uapi/rdma/hfi/hfi1_user.h
··· 66 66 * The major version changes when data structures change in an incompatible 67 67 * way. The driver must be the same for initialization to succeed. 68 68 */ 69 - #define HFI1_USER_SWMAJOR 4 69 + #define HFI1_USER_SWMAJOR 5 70 70 71 71 /* 72 72 * Minor version differences are always compatible ··· 241 241 __u32 tidcnt; 242 242 /* length of transfer buffer programmed by this request */ 243 243 __u32 length; 244 - /* 245 - * pointer to bitmap of TIDs used for this call; 246 - * checked for being large enough at open 247 - */ 248 - __u64 tidmap; 249 244 }; 250 245 251 246 struct hfi1_cmd {