Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add aldebaran trap handler support

Similar to arcturus, but ARCH/ACC VGPRs may now be split unevenly.
A new field in SQ_WAVE_GPR_ALLOC tracks the boundary between the two
sets of VGPRs.

Squash below patches:

drm/amdkfd: Use preprocessor for IP-specific trap handler code
drm/amdkfd: Fix VGPR restore race in gfx8/gfx9 trap handler
drm/amdkfd: Remove duplicated code in gfx9 trap handler
drm/amdkfd: Separate ARCH/ACC VGPR restore in trap handler
drm/amdkfd: Reverse order of ARCH/ACC VGPR restore in trap handler

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jay Cornwall and committed by
Alex Deucher
0ef6845c 5af81c6e

+621 -86
+491
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
··· 1575 1575 0xbf810000, 0x00000000, 1576 1576 }; 1577 1577 1578 + static const uint32_t cwsr_trap_aldebaran_hex[] = { 1579 + 0xbf820001, 0xbf8202cd, 1580 + 0xb8f8f802, 0x89788678, 1581 + 0xb8eef801, 0x866eff6e, 1582 + 0x00000800, 0xbf840003, 1583 + 0x866eff78, 0x00002000, 1584 + 0xbf840016, 0xb8fbf803, 1585 + 0x866eff7b, 0x00000400, 1586 + 0xbf85003b, 0x866eff7b, 1587 + 0x00000800, 0xbf850003, 1588 + 0x866eff7b, 0x00000100, 1589 + 0xbf84000c, 0x866eff78, 1590 + 0x00002000, 0xbf840005, 1591 + 0xbf8e0010, 0xb8eef803, 1592 + 0x866eff6e, 0x00000400, 1593 + 0xbf84fffb, 0x8778ff78, 1594 + 0x00002000, 0x80ec886c, 1595 + 0x82ed806d, 0xb8eef807, 1596 + 0x866fff6e, 0x001f8000, 1597 + 0x8e6f8b6f, 0x8977ff77, 1598 + 0xfc000000, 0x87776f77, 1599 + 0x896eff6e, 0x001f8000, 1600 + 0xb96ef807, 0xb8faf812, 1601 + 0xb8fbf813, 0x8efa887a, 1602 + 0xc0071bbd, 0x00000000, 1603 + 0xbf8cc07f, 0xc0071ebd, 1604 + 0x00000008, 0xbf8cc07f, 1605 + 0x86ee6e6e, 0xbf840001, 1606 + 0xbe801d6e, 0xb8fbf803, 1607 + 0x867bff7b, 0x000001ff, 1608 + 0xbf850002, 0x806c846c, 1609 + 0x826d806d, 0x866dff6d, 1610 + 0x0000ffff, 0x8f6e8b77, 1611 + 0x866eff6e, 0x001f8000, 1612 + 0xb96ef807, 0x86fe7e7e, 1613 + 0x86ea6a6a, 0x8f6e8378, 1614 + 0xb96ee0c2, 0xbf800002, 1615 + 0xb9780002, 0xbe801f6c, 1616 + 0x866dff6d, 0x0000ffff, 1617 + 0xbefa0080, 0xb97a0283, 1618 + 0xb8fa2407, 0x8e7a9b7a, 1619 + 0x876d7a6d, 0xb8fa03c7, 1620 + 0x8e7a9a7a, 0x876d7a6d, 1621 + 0xb8faf807, 0x867aff7a, 1622 + 0x00007fff, 0xb97af807, 1623 + 0xbeee007e, 0xbeef007f, 1624 + 0xbefe0180, 0xbf900004, 1625 + 0x877a8478, 0xb97af802, 1626 + 0xbf8e0002, 0xbf88fffe, 1627 + 0xb8fa2985, 0x807a817a, 1628 + 0x8e7a8a7a, 0x8e7a817a, 1629 + 0xb8fb1605, 0x807b817b, 1630 + 0x8e7b867b, 0x807a7b7a, 1631 + 0x807a7e7a, 0x827b807f, 1632 + 0x867bff7b, 0x0000ffff, 1633 + 0xc04b1c3d, 0x00000050, 1634 + 0xbf8cc07f, 0xc04b1d3d, 1635 + 0x00000060, 0xbf8cc07f, 1636 + 0xc0431e7d, 0x00000074, 1637 + 0xbf8cc07f, 0xbef4007e, 1638 + 0x8675ff7f, 0x0000ffff, 1639 + 0x8775ff75, 0x00040000, 1640 + 0xbef60080, 0xbef700ff, 1641 + 0x00807fac, 0x867aff7f, 1642 + 0x08000000, 0x8f7a837a, 1643 + 0x87777a77, 0x867aff7f, 1644 + 0x70000000, 0x8f7a817a, 1645 + 0x87777a77, 0xbef1007c, 1646 + 0xbef00080, 0xb8f02985, 1647 + 0x80708170, 0x8e708a70, 1648 + 0x8e708170, 0xb8fa1605, 1649 + 0x807a817a, 0x8e7a867a, 1650 + 0x80707a70, 0xbef60084, 1651 + 0xbef600ff, 0x01000000, 1652 + 0xbefe007c, 0xbefc0070, 1653 + 0xc0611c7a, 0x0000007c, 1654 + 0xbf8cc07f, 0x80708470, 1655 + 0xbefc007e, 0xbefe007c, 1656 + 0xbefc0070, 0xc0611b3a, 1657 + 0x0000007c, 0xbf8cc07f, 1658 + 0x80708470, 0xbefc007e, 1659 + 0xbefe007c, 0xbefc0070, 1660 + 0xc0611b7a, 0x0000007c, 1661 + 0xbf8cc07f, 0x80708470, 1662 + 0xbefc007e, 0xbefe007c, 1663 + 0xbefc0070, 0xc0611bba, 1664 + 0x0000007c, 0xbf8cc07f, 1665 + 0x80708470, 0xbefc007e, 1666 + 0xbefe007c, 0xbefc0070, 1667 + 0xc0611bfa, 0x0000007c, 1668 + 0xbf8cc07f, 0x80708470, 1669 + 0xbefc007e, 0xbefe007c, 1670 + 0xbefc0070, 0xc0611e3a, 1671 + 0x0000007c, 0xbf8cc07f, 1672 + 0x80708470, 0xbefc007e, 1673 + 0xb8fbf803, 0xbefe007c, 1674 + 0xbefc0070, 0xc0611efa, 1675 + 0x0000007c, 0xbf8cc07f, 1676 + 0x80708470, 0xbefc007e, 1677 + 0xbefe007c, 0xbefc0070, 1678 + 0xc0611a3a, 0x0000007c, 1679 + 0xbf8cc07f, 0x80708470, 1680 + 0xbefc007e, 0xbefe007c, 1681 + 0xbefc0070, 0xc0611a7a, 1682 + 0x0000007c, 0xbf8cc07f, 1683 + 0x80708470, 0xbefc007e, 1684 + 0xb8f1f801, 0xbefe007c, 1685 + 0xbefc0070, 0xc0611c7a, 1686 + 0x0000007c, 0xbf8cc07f, 1687 + 0x80708470, 0xbefc007e, 1688 + 0x867aff7f, 0x04000000, 1689 + 0xbeef0080, 0x876f6f7a, 1690 + 0xb8f02985, 0x80708170, 1691 + 0x8e708a70, 0x8e708170, 1692 + 0xb8fb1605, 0x807b817b, 1693 + 0x8e7b847b, 0x8e76827b, 1694 + 0xbef600ff, 0x01000000, 1695 + 0xbef20174, 0x80747074, 1696 + 0x82758075, 0xbefc0080, 1697 + 0xbf800000, 0xbe802b00, 1698 + 0xbe822b02, 0xbe842b04, 1699 + 0xbe862b06, 0xbe882b08, 1700 + 0xbe8a2b0a, 0xbe8c2b0c, 1701 + 0xbe8e2b0e, 0xc06b003a, 1702 + 0x00000000, 0xbf8cc07f, 1703 + 0xc06b013a, 0x00000010, 1704 + 0xbf8cc07f, 0xc06b023a, 1705 + 0x00000020, 0xbf8cc07f, 1706 + 0xc06b033a, 0x00000030, 1707 + 0xbf8cc07f, 0x8074c074, 1708 + 0x82758075, 0x807c907c, 1709 + 0xbf0a7b7c, 0xbf85ffe7, 1710 + 0xbef40172, 0xbef00080, 1711 + 0xbefe00c1, 0xbeff00c1, 1712 + 0xbee80080, 0xbee90080, 1713 + 0xbef600ff, 0x01000000, 1714 + 0x867aff78, 0x00400000, 1715 + 0xbf850003, 0xb8faf803, 1716 + 0x897a7aff, 0x10000000, 1717 + 0xbf85004d, 0xbe840080, 1718 + 0xd2890000, 0x00000900, 1719 + 0x80048104, 0xd2890001, 1720 + 0x00000900, 0x80048104, 1721 + 0xd2890002, 0x00000900, 1722 + 0x80048104, 0xd2890003, 1723 + 0x00000900, 0x80048104, 1724 + 0xc069003a, 0x00000070, 1725 + 0xbf8cc07f, 0x80709070, 1726 + 0xbf06c004, 0xbf84ffee, 1727 + 0xbe840080, 0xd2890000, 1728 + 0x00000901, 0x80048104, 1729 + 0xd2890001, 0x00000901, 1730 + 0x80048104, 0xd2890002, 1731 + 0x00000901, 0x80048104, 1732 + 0xd2890003, 0x00000901, 1733 + 0x80048104, 0xc069003a, 1734 + 0x00000070, 0xbf8cc07f, 1735 + 0x80709070, 0xbf06c004, 1736 + 0xbf84ffee, 0xbe840080, 1737 + 0xd2890000, 0x00000902, 1738 + 0x80048104, 0xd2890001, 1739 + 0x00000902, 0x80048104, 1740 + 0xd2890002, 0x00000902, 1741 + 0x80048104, 0xd2890003, 1742 + 0x00000902, 0x80048104, 1743 + 0xc069003a, 0x00000070, 1744 + 0xbf8cc07f, 0x80709070, 1745 + 0xbf06c004, 0xbf84ffee, 1746 + 0xbe840080, 0xd2890000, 1747 + 0x00000903, 0x80048104, 1748 + 0xd2890001, 0x00000903, 1749 + 0x80048104, 0xd2890002, 1750 + 0x00000903, 0x80048104, 1751 + 0xd2890003, 0x00000903, 1752 + 0x80048104, 0xc069003a, 1753 + 0x00000070, 0xbf8cc07f, 1754 + 0x80709070, 0xbf06c004, 1755 + 0xbf84ffee, 0xbf820008, 1756 + 0xe0724000, 0x701d0000, 1757 + 0xe0724100, 0x701d0100, 1758 + 0xe0724200, 0x701d0200, 1759 + 0xe0724300, 0x701d0300, 1760 + 0xbefe00c1, 0xbeff00c1, 1761 + 0xb8fb4306, 0x867bc17b, 1762 + 0xbf840064, 0xbf8a0000, 1763 + 0x867aff6f, 0x04000000, 1764 + 0xbf840060, 0x8e7b867b, 1765 + 0x8e7b827b, 0xbef6007b, 1766 + 0xb8f02985, 0x80708170, 1767 + 0x8e708a70, 0x8e708170, 1768 + 0xb8fa1605, 0x807a817a, 1769 + 0x8e7a867a, 0x80707a70, 1770 + 0x8070ff70, 0x00000080, 1771 + 0xbef600ff, 0x01000000, 1772 + 0xbefc0080, 0xd28c0002, 1773 + 0x000100c1, 0xd28d0003, 1774 + 0x000204c1, 0x867aff78, 1775 + 0x00400000, 0xbf850003, 1776 + 0xb8faf803, 0x897a7aff, 1777 + 0x10000000, 0xbf850030, 1778 + 0x24040682, 0xd86e4000, 1779 + 0x00000002, 0xbf8cc07f, 1780 + 0xbe840080, 0xd2890000, 1781 + 0x00000900, 0x80048104, 1782 + 0xd2890001, 0x00000900, 1783 + 0x80048104, 0xd2890002, 1784 + 0x00000900, 0x80048104, 1785 + 0xd2890003, 0x00000900, 1786 + 0x80048104, 0xc069003a, 1787 + 0x00000070, 0xbf8cc07f, 1788 + 0x80709070, 0xbf06c004, 1789 + 0xbf84ffee, 0xbe840080, 1790 + 0xd2890000, 0x00000901, 1791 + 0x80048104, 0xd2890001, 1792 + 0x00000901, 0x80048104, 1793 + 0xd2890002, 0x00000901, 1794 + 0x80048104, 0xd2890003, 1795 + 0x00000901, 0x80048104, 1796 + 0xc069003a, 0x00000070, 1797 + 0xbf8cc07f, 0x80709070, 1798 + 0xbf06c004, 0xbf84ffee, 1799 + 0x680404ff, 0x00000200, 1800 + 0xd0c9006a, 0x0000f702, 1801 + 0xbf87ffd2, 0xbf820015, 1802 + 0xd1060002, 0x00011103, 1803 + 0x7e0602ff, 0x00000200, 1804 + 0xbefc00ff, 0x00010000, 1805 + 0xbe800077, 0x8677ff77, 1806 + 0xff7fffff, 0x8777ff77, 1807 + 0x00058000, 0xd8ec0000, 1808 + 0x00000002, 0xbf8cc07f, 1809 + 0xe0765000, 0x701d0002, 1810 + 0x68040702, 0xd0c9006a, 1811 + 0x0000f702, 0xbf87fff7, 1812 + 0xbef70000, 0xbef000ff, 1813 + 0x00000400, 0xbefe00c1, 1814 + 0xbeff00c1, 0xb8fb2b05, 1815 + 0x807b817b, 0x8e7b827b, 1816 + 0xbef600ff, 0x01000000, 1817 + 0xbefc0084, 0xbf0a7b7c, 1818 + 0xbf84006d, 0xbf11017c, 1819 + 0x807bff7b, 0x00001000, 1820 + 0x867aff78, 0x00400000, 1821 + 0xbf850003, 0xb8faf803, 1822 + 0x897a7aff, 0x10000000, 1823 + 0xbf850051, 0xbe840080, 1824 + 0xd2890000, 0x00000900, 1825 + 0x80048104, 0xd2890001, 1826 + 0x00000900, 0x80048104, 1827 + 0xd2890002, 0x00000900, 1828 + 0x80048104, 0xd2890003, 1829 + 0x00000900, 0x80048104, 1830 + 0xc069003a, 0x00000070, 1831 + 0xbf8cc07f, 0x80709070, 1832 + 0xbf06c004, 0xbf84ffee, 1833 + 0xbe840080, 0xd2890000, 1834 + 0x00000901, 0x80048104, 1835 + 0xd2890001, 0x00000901, 1836 + 0x80048104, 0xd2890002, 1837 + 0x00000901, 0x80048104, 1838 + 0xd2890003, 0x00000901, 1839 + 0x80048104, 0xc069003a, 1840 + 0x00000070, 0xbf8cc07f, 1841 + 0x80709070, 0xbf06c004, 1842 + 0xbf84ffee, 0xbe840080, 1843 + 0xd2890000, 0x00000902, 1844 + 0x80048104, 0xd2890001, 1845 + 0x00000902, 0x80048104, 1846 + 0xd2890002, 0x00000902, 1847 + 0x80048104, 0xd2890003, 1848 + 0x00000902, 0x80048104, 1849 + 0xc069003a, 0x00000070, 1850 + 0xbf8cc07f, 0x80709070, 1851 + 0xbf06c004, 0xbf84ffee, 1852 + 0xbe840080, 0xd2890000, 1853 + 0x00000903, 0x80048104, 1854 + 0xd2890001, 0x00000903, 1855 + 0x80048104, 0xd2890002, 1856 + 0x00000903, 0x80048104, 1857 + 0xd2890003, 0x00000903, 1858 + 0x80048104, 0xc069003a, 1859 + 0x00000070, 0xbf8cc07f, 1860 + 0x80709070, 0xbf06c004, 1861 + 0xbf84ffee, 0x807c847c, 1862 + 0xbf0a7b7c, 0xbf85ffb1, 1863 + 0xbf9c0000, 0xbf820012, 1864 + 0x7e000300, 0x7e020301, 1865 + 0x7e040302, 0x7e060303, 1866 + 0xe0724000, 0x701d0000, 1867 + 0xe0724100, 0x701d0100, 1868 + 0xe0724200, 0x701d0200, 1869 + 0xe0724300, 0x701d0300, 1870 + 0x807c847c, 0x8070ff70, 1871 + 0x00000400, 0xbf0a7b7c, 1872 + 0xbf85ffef, 0xbf9c0000, 1873 + 0xb8fb2985, 0x807b817b, 1874 + 0x8e7b837b, 0xb8fa2b05, 1875 + 0x807a817a, 0x8e7a827a, 1876 + 0x80fb7a7b, 0xbf84007a, 1877 + 0x807bff7b, 0x00001000, 1878 + 0xbefc0080, 0xbf11017c, 1879 + 0x867aff78, 0x00400000, 1880 + 0xbf850003, 0xb8faf803, 1881 + 0x897a7aff, 0x10000000, 1882 + 0xbf850059, 0xd3d84000, 1883 + 0x18000100, 0xd3d84001, 1884 + 0x18000101, 0xd3d84002, 1885 + 0x18000102, 0xd3d84003, 1886 + 0x18000103, 0xbe840080, 1887 + 0xd2890000, 0x00000900, 1888 + 0x80048104, 0xd2890001, 1889 + 0x00000900, 0x80048104, 1890 + 0xd2890002, 0x00000900, 1891 + 0x80048104, 0xd2890003, 1892 + 0x00000900, 0x80048104, 1893 + 0xc069003a, 0x00000070, 1894 + 0xbf8cc07f, 0x80709070, 1895 + 0xbf06c004, 0xbf84ffee, 1896 + 0xbe840080, 0xd2890000, 1897 + 0x00000901, 0x80048104, 1898 + 0xd2890001, 0x00000901, 1899 + 0x80048104, 0xd2890002, 1900 + 0x00000901, 0x80048104, 1901 + 0xd2890003, 0x00000901, 1902 + 0x80048104, 0xc069003a, 1903 + 0x00000070, 0xbf8cc07f, 1904 + 0x80709070, 0xbf06c004, 1905 + 0xbf84ffee, 0xbe840080, 1906 + 0xd2890000, 0x00000902, 1907 + 0x80048104, 0xd2890001, 1908 + 0x00000902, 0x80048104, 1909 + 0xd2890002, 0x00000902, 1910 + 0x80048104, 0xd2890003, 1911 + 0x00000902, 0x80048104, 1912 + 0xc069003a, 0x00000070, 1913 + 0xbf8cc07f, 0x80709070, 1914 + 0xbf06c004, 0xbf84ffee, 1915 + 0xbe840080, 0xd2890000, 1916 + 0x00000903, 0x80048104, 1917 + 0xd2890001, 0x00000903, 1918 + 0x80048104, 0xd2890002, 1919 + 0x00000903, 0x80048104, 1920 + 0xd2890003, 0x00000903, 1921 + 0x80048104, 0xc069003a, 1922 + 0x00000070, 0xbf8cc07f, 1923 + 0x80709070, 0xbf06c004, 1924 + 0xbf84ffee, 0x807c847c, 1925 + 0xbf0a7b7c, 0xbf85ffa9, 1926 + 0xbf9c0000, 0xbf820016, 1927 + 0xd3d84000, 0x18000100, 1928 + 0xd3d84001, 0x18000101, 1929 + 0xd3d84002, 0x18000102, 1930 + 0xd3d84003, 0x18000103, 1931 + 0xe0724000, 0x701d0000, 1932 + 0xe0724100, 0x701d0100, 1933 + 0xe0724200, 0x701d0200, 1934 + 0xe0724300, 0x701d0300, 1935 + 0x807c847c, 0x8070ff70, 1936 + 0x00000400, 0xbf0a7b7c, 1937 + 0xbf85ffeb, 0xbf9c0000, 1938 + 0xbf820100, 0xbef4007e, 1939 + 0x8675ff7f, 0x0000ffff, 1940 + 0x8775ff75, 0x00040000, 1941 + 0xbef60080, 0xbef700ff, 1942 + 0x00807fac, 0x866eff7f, 1943 + 0x08000000, 0x8f6e836e, 1944 + 0x87776e77, 0x866eff7f, 1945 + 0x70000000, 0x8f6e816e, 1946 + 0x87776e77, 0x866eff7f, 1947 + 0x04000000, 0xbf84001f, 1948 + 0xbefe00c1, 0xbeff00c1, 1949 + 0xb8ef4306, 0x866fc16f, 1950 + 0xbf84001a, 0x8e6f866f, 1951 + 0x8e6f826f, 0xbef6006f, 1952 + 0xb8f82985, 0x80788178, 1953 + 0x8e788a78, 0x8e788178, 1954 + 0xb8ee1605, 0x806e816e, 1955 + 0x8e6e866e, 0x80786e78, 1956 + 0x8078ff78, 0x00000080, 1957 + 0xbef600ff, 0x01000000, 1958 + 0xbefc0080, 0xe0510000, 1959 + 0x781d0000, 0xe0510100, 1960 + 0x781d0000, 0x807cff7c, 1961 + 0x00000200, 0x8078ff78, 1962 + 0x00000200, 0xbf0a6f7c, 1963 + 0xbf85fff6, 0xbefe00c1, 1964 + 0xbeff00c1, 0xbef600ff, 1965 + 0x01000000, 0xb8ef2b05, 1966 + 0x806f816f, 0x8e6f826f, 1967 + 0x806fff6f, 0x00008000, 1968 + 0xbef80080, 0xbeee0078, 1969 + 0x8078ff78, 0x00000400, 1970 + 0xbefc0084, 0xbf11087c, 1971 + 0xe0524000, 0x781d0000, 1972 + 0xe0524100, 0x781d0100, 1973 + 0xe0524200, 0x781d0200, 1974 + 0xe0524300, 0x781d0300, 1975 + 0xbf8c0f70, 0x7e000300, 1976 + 0x7e020301, 0x7e040302, 1977 + 0x7e060303, 0x807c847c, 1978 + 0x8078ff78, 0x00000400, 1979 + 0xbf0a6f7c, 0xbf85ffee, 1980 + 0xb8ef2985, 0x806f816f, 1981 + 0x8e6f836f, 0xb8f92b05, 1982 + 0x80798179, 0x8e798279, 1983 + 0x80ef796f, 0xbf84001a, 1984 + 0x806fff6f, 0x00008000, 1985 + 0xbefc0080, 0xbf11087c, 1986 + 0xe0524000, 0x781d0000, 1987 + 0xe0524100, 0x781d0100, 1988 + 0xe0524200, 0x781d0200, 1989 + 0xe0524300, 0x781d0300, 1990 + 0xbf8c0f70, 0xd3d94000, 1991 + 0x18000100, 0xd3d94001, 1992 + 0x18000101, 0xd3d94002, 1993 + 0x18000102, 0xd3d94003, 1994 + 0x18000103, 0x807c847c, 1995 + 0x8078ff78, 0x00000400, 1996 + 0xbf0a6f7c, 0xbf85ffea, 1997 + 0xbf9c0000, 0xe0524000, 1998 + 0x6e1d0000, 0xe0524100, 1999 + 0x6e1d0100, 0xe0524200, 2000 + 0x6e1d0200, 0xe0524300, 2001 + 0x6e1d0300, 0xbf8c0f70, 2002 + 0xb8f82985, 0x80788178, 2003 + 0x8e788a78, 0x8e788178, 2004 + 0xb8ee1605, 0x806e816e, 2005 + 0x8e6e866e, 0x80786e78, 2006 + 0x80f8c078, 0xb8ef1605, 2007 + 0x806f816f, 0x8e6f846f, 2008 + 0x8e76826f, 0xbef600ff, 2009 + 0x01000000, 0xbefc006f, 2010 + 0xc031003a, 0x00000078, 2011 + 0x80f8c078, 0xbf8cc07f, 2012 + 0x80fc907c, 0xbf800000, 2013 + 0xbe802d00, 0xbe822d02, 2014 + 0xbe842d04, 0xbe862d06, 2015 + 0xbe882d08, 0xbe8a2d0a, 2016 + 0xbe8c2d0c, 0xbe8e2d0e, 2017 + 0xbf06807c, 0xbf84fff0, 2018 + 0xb8f82985, 0x80788178, 2019 + 0x8e788a78, 0x8e788178, 2020 + 0xb8ee1605, 0x806e816e, 2021 + 0x8e6e866e, 0x80786e78, 2022 + 0xbef60084, 0xbef600ff, 2023 + 0x01000000, 0xc0211bfa, 2024 + 0x00000078, 0x80788478, 2025 + 0xc0211b3a, 0x00000078, 2026 + 0x80788478, 0xc0211b7a, 2027 + 0x00000078, 0x80788478, 2028 + 0xc0211c3a, 0x00000078, 2029 + 0x80788478, 0xc0211c7a, 2030 + 0x00000078, 0x80788478, 2031 + 0xc0211eba, 0x00000078, 2032 + 0x80788478, 0xc0211efa, 2033 + 0x00000078, 0x80788478, 2034 + 0xc0211a3a, 0x00000078, 2035 + 0x80788478, 0xc0211a7a, 2036 + 0x00000078, 0x80788478, 2037 + 0xc0211cfa, 0x00000078, 2038 + 0x80788478, 0xbf8cc07f, 2039 + 0xbefc006f, 0xbefe0070, 2040 + 0xbeff0071, 0x866f7bff, 2041 + 0x000003ff, 0xb96f4803, 2042 + 0x866f7bff, 0xfffff800, 2043 + 0x8f6f8b6f, 0xb96fa2c3, 2044 + 0xb973f801, 0xb8ee2985, 2045 + 0x806e816e, 0x8e6e8a6e, 2046 + 0x8e6e816e, 0xb8ef1605, 2047 + 0x806f816f, 0x8e6f866f, 2048 + 0x806e6f6e, 0x806e746e, 2049 + 0x826f8075, 0x866fff6f, 2050 + 0x0000ffff, 0xc00b1c37, 2051 + 0x00000050, 0xc00b1d37, 2052 + 0x00000060, 0xc0031e77, 2053 + 0x00000074, 0xbf8cc07f, 2054 + 0x866fff6d, 0xf8000000, 2055 + 0x8f6f9b6f, 0x8e6f906f, 2056 + 0xbeee0080, 0x876e6f6e, 2057 + 0x866fff6d, 0x04000000, 2058 + 0x8f6f9a6f, 0x8e6f8f6f, 2059 + 0x876e6f6e, 0x866fff7a, 2060 + 0x00800000, 0x8f6f976f, 2061 + 0xb96ef807, 0x866dff6d, 2062 + 0x0000ffff, 0x86fe7e7e, 2063 + 0x86ea6a6a, 0x8f6e837a, 2064 + 0xb96ee0c2, 0xbf800002, 2065 + 0xb97a0002, 0xbf8a0000, 2066 + 0x95806f6c, 0xbf810000, 2067 + }; 2068 + 1578 2069 static const uint32_t cwsr_trap_gfx10_hex[] = { 1579 2070 0xbf820001, 0xbf8201cf, 1580 2071 0xb0804004, 0xb978f802,
+1
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
··· 563 563 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 564 564 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 565 565 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 566 + s_waitcnt vmcnt(0) 566 567 567 568 /* restore SGPRs */ 568 569 //////////////////////////////
+124 -84
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
··· 21 21 */ 22 22 23 23 /* To compile this assembly code: 24 - * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex 24 + * 25 + * gfx9: 26 + * cpp -DASIC_FAMILY=CHIP_VEGAM cwsr_trap_handler_gfx9.asm -P -o gfx9.sp3 27 + * sp3 gfx9.sp3 -hex gfx9.hex 28 + * 29 + * arcturus: 30 + * cpp -DASIC_FAMILY=CHIP_ARCTURUS cwsr_trap_handler_gfx9.asm -P -o arcturus.sp3 31 + * sp3 arcturus.sp3 -hex arcturus.hex 32 + * 33 + * aldebaran: 34 + * cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3 35 + * sp3 aldebaran.sp3 -hex aldebaran.hex 25 36 */ 37 + 38 + #define CHIP_VEGAM 18 39 + #define CHIP_ARCTURUS 23 40 + #define CHIP_ALDEBARAN 25 26 41 27 42 var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency 28 43 var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger ··· 59 44 60 45 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 61 46 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 62 - var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 63 47 var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 64 - var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 65 48 var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits 49 + var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 50 + 51 + #if ASIC_FAMILY >= CHIP_ALDEBARAN 52 + var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 6 53 + var SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SHIFT = 12 54 + var SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SIZE = 6 55 + #else 56 + var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 57 + #endif 66 58 67 59 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 68 60 var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask ··· 156 134 var s_restore_spi_init_hi = exec_hi 157 135 158 136 var s_restore_mem_offset = ttmp12 159 - var s_restore_accvgpr_offset = ttmp13 137 + var s_restore_tmp2 = ttmp13 160 138 var s_restore_alloc_size = ttmp3 161 139 var s_restore_tmp = ttmp2 162 140 var s_restore_mem_offset_save = s_restore_tmp //no conflict ··· 488 466 L_SAVE_FIRST_VGPRS_WITH_TCP: 489 467 end 490 468 491 - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 492 - buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 493 - buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 494 - buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 495 - 496 - 469 + write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset) 497 470 498 471 /* save LDS */ 499 472 ////////////////////////////// ··· 582 565 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 583 566 s_mov_b32 exec_hi, 0xFFFFFFFF 584 567 585 - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size 586 - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 587 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible 588 - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) 589 - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 568 + get_num_arch_vgprs(s_save_alloc_size) 569 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 590 570 591 571 592 572 // VGPR store using dw burst ··· 616 602 v_mov_b32 v2, v2 //v0 = v[0+m0] 617 603 v_mov_b32 v3, v3 //v0 = v[0+m0] 618 604 619 - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 620 - buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 621 - buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 622 - buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 605 + write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset) 623 606 624 607 s_add_u32 m0, m0, 4 //next vgpr index 625 608 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes ··· 626 615 627 616 L_SAVE_VGPR_END: 628 617 629 - if ASIC_TARGET_ARCTURUS 618 + #if ASIC_FAMILY >= CHIP_ARCTURUS 630 619 // Save ACC VGPRs 620 + 621 + #if ASIC_FAMILY >= CHIP_ALDEBARAN 622 + // ACC VGPR count may differ from ARCH VGPR count. 623 + get_num_acc_vgprs(s_save_alloc_size, s_save_tmp) 624 + s_cbranch_scc0 L_SAVE_ACCVGPR_END 625 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later 626 + #endif 627 + 631 628 s_mov_b32 m0, 0x0 //VGPR initial index value =0 632 629 s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 633 630 ··· 663 644 v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0] 664 645 end 665 646 666 - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 667 - buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 668 - buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 669 - buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 647 + write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset) 670 648 671 649 s_add_u32 m0, m0, 4 672 650 s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 ··· 672 656 s_set_gpr_idx_off 673 657 674 658 L_SAVE_ACCVGPR_END: 675 - end 659 + #endif 676 660 677 661 s_branch L_END_PGM 678 662 ··· 740 724 /* restore VGPRs */ 741 725 ////////////////////////////// 742 726 L_RESTORE_VGPR: 743 - // VGPR SR memory offset : 0 744 - s_mov_b32 s_restore_mem_offset, 0x0 745 727 s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead 746 728 s_mov_b32 exec_hi, 0xFFFFFFFF 729 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 747 730 748 - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size 749 - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 750 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) 751 - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) 752 - 753 - if ASIC_TARGET_ARCTURUS 754 - s_mov_b32 s_restore_accvgpr_offset, s_restore_buf_rsrc2 //ACC VGPRs at end of VGPRs 755 - end 756 - 757 - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 758 - 759 - // VGPR load using dw burst 760 - s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last 761 - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 762 - if ASIC_TARGET_ARCTURUS 763 - s_mov_b32 s_restore_accvgpr_offset_save, s_restore_accvgpr_offset 764 - s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4 765 - end 766 - s_mov_b32 m0, 4 //VGPR initial index value = 1 767 - s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 731 + // Save ARCH VGPRs 4-N, then all ACC VGPRs, then ARCH VGPRs 0-3. 732 + get_num_arch_vgprs(s_restore_alloc_size) 768 733 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later 769 734 735 + // ARCH VGPRs at offset: 0 736 + s_mov_b32 s_restore_mem_offset, 0x0 737 + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last 738 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 739 + s_mov_b32 m0, 4 //VGPR initial index value = 1 740 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 741 + 770 742 L_RESTORE_VGPR_LOOP: 771 - 772 - if ASIC_TARGET_ARCTURUS 773 - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 774 - buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256 775 - buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*2 776 - buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*3 777 - s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4 778 - s_waitcnt vmcnt(0) 779 - 780 - for var vgpr = 0; vgpr < 4; ++ vgpr 781 - v_accvgpr_write acc[vgpr], v[vgpr] 782 - end 783 - end 784 - 785 - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 786 - buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 787 - buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 788 - buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 789 - s_waitcnt vmcnt(0) //ensure data ready 743 + read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset) 790 744 v_mov_b32 v0, v0 //v[0+m0] = v0 791 745 v_mov_b32 v1, v1 792 746 v_mov_b32 v2, v2 ··· 765 779 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes 766 780 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 767 781 s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? 782 + 783 + #if ASIC_FAMILY >= CHIP_ALDEBARAN 784 + // ACC VGPR count may differ from ARCH VGPR count. 785 + get_num_acc_vgprs(s_restore_alloc_size, s_restore_tmp2) 786 + s_cbranch_scc0 L_RESTORE_ACCVGPR_END 787 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later 788 + #endif 789 + 790 + #if ASIC_FAMILY >= CHIP_ARCTURUS 791 + // ACC VGPRs at offset: size(ARCH VGPRs) 792 + s_mov_b32 m0, 0 793 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 794 + 795 + L_RESTORE_ACCVGPR_LOOP: 796 + read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset) 797 + 798 + for var vgpr = 0; vgpr < 4; ++ vgpr 799 + v_accvgpr_write acc[vgpr], v[vgpr] 800 + end 801 + 802 + s_add_u32 m0, m0, 4 //next vgpr index 803 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes 804 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 805 + s_cbranch_scc1 L_RESTORE_ACCVGPR_LOOP //VGPR restore (except v0) is complete? 806 + L_RESTORE_ACCVGPR_END: 807 + #endif 808 + 768 809 s_set_gpr_idx_off 769 - /* VGPR restore on v0 */ 770 - if ASIC_TARGET_ARCTURUS 771 - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 772 - buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256 773 - buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*2 774 - buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*3 775 - s_waitcnt vmcnt(0) 776 810 777 - for var vgpr = 0; vgpr < 4; ++ vgpr 778 - v_accvgpr_write acc[vgpr], v[vgpr] 779 - end 780 - end 781 - 782 - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 783 - buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 784 - buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 785 - buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 811 + // Restore VGPRs 0-3 last, no longer needed. 812 + read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset_save) 786 813 787 814 /* restore SGPRs */ 788 815 ////////////////////////////// ··· 973 974 L_TCP_STORE_CHECK_DONE: 974 975 end 975 976 977 + function write_4vgprs_to_mem(s_rsrc, s_mem_offset) 978 + buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 979 + buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 980 + buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 981 + buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 982 + end 983 + 984 + function read_4vgprs_from_mem(s_rsrc, s_mem_offset) 985 + buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 986 + buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 987 + buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 988 + buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 989 + s_waitcnt vmcnt(0) 990 + end 991 + 976 992 function write_vgpr_to_mem_with_sqc(v, s_rsrc, s_mem_offset) 977 993 s_mov_b32 s4, 0 978 994 ··· 1022 1008 s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 1023 1009 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible 1024 1010 1025 - if ASIC_TARGET_ARCTURUS 1011 + #if ASIC_FAMILY >= CHIP_ARCTURUS 1026 1012 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, 1 // Double size for ACC VGPRs 1027 - end 1013 + #endif 1028 1014 end 1029 1015 1030 1016 function get_sgpr_size_bytes(s_sgpr_size_byte) ··· 1036 1022 function get_hwreg_size_bytes 1037 1023 return 128 //HWREG size 128 bytes 1038 1024 end 1025 + 1026 + function get_num_arch_vgprs(s_num_arch_vgprs) 1027 + #if ASIC_FAMILY >= CHIP_ALDEBARAN 1028 + // VGPR count includes ACC VGPRs, use ACC VGPR offset for ARCH VGPR count. 1029 + s_getreg_b32 s_num_arch_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SHIFT,SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SIZE) 1030 + #else 1031 + s_getreg_b32 s_num_arch_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) 1032 + #endif 1033 + 1034 + // Number of VGPRs = (vgpr_size + 1) * 4 1035 + s_add_u32 s_num_arch_vgprs, s_num_arch_vgprs, 1 1036 + s_lshl_b32 s_num_arch_vgprs, s_num_arch_vgprs, 2 1037 + end 1038 + 1039 + #if ASIC_FAMILY >= CHIP_ALDEBARAN 1040 + function get_num_acc_vgprs(s_num_acc_vgprs, s_tmp) 1041 + // VGPR count = (GPR_ALLOC.VGPR_SIZE + 1) * 8 1042 + s_getreg_b32 s_num_acc_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) 1043 + s_add_u32 s_num_acc_vgprs, s_num_acc_vgprs, 1 1044 + s_lshl_b32 s_num_acc_vgprs, s_num_acc_vgprs, 3 1045 + 1046 + // ACC VGPR count = VGPR count - ARCH VGPR count. 1047 + get_num_arch_vgprs(s_tmp) 1048 + s_sub_u32 s_num_acc_vgprs, s_num_acc_vgprs, s_tmp 1049 + end 1050 + #endif 1039 1051 1040 1052 function ack_sqc_store_workaround 1041 1053 if ACK_SQC_STORE
+5 -2
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 656 656 BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); 657 657 kfd->cwsr_isa = cwsr_trap_gfx8_hex; 658 658 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); 659 - } else if (kfd->device_info->asic_family == CHIP_ARCTURUS 660 - || kfd->device_info->asic_family == CHIP_ALDEBARAN) { 659 + } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { 661 660 BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); 662 661 kfd->cwsr_isa = cwsr_trap_arcturus_hex; 663 662 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); 663 + } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { 664 + BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); 665 + kfd->cwsr_isa = cwsr_trap_aldebaran_hex; 666 + kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); 664 667 } else if (kfd->device_info->asic_family < CHIP_NAVI10) { 665 668 BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); 666 669 kfd->cwsr_isa = cwsr_trap_gfx9_hex;