Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon: Add CP init for CIK (v7)

Sets up the GFX ring and loads ucode for GFX and Compute.

Todo:
- handle compute queue setup.

v2: add documentation
v3: integrate with latest reset changes
v4: additional init fixes
v5: scratch reg write back no longer supported on CIK
v6: properly set CP_RB0_BASE_HI
v7: rebase

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+899 -2
+1 -1
drivers/gpu/drm/radeon/Makefile
··· 76 76 evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ 77 77 evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ 78 78 atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ 79 - si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o 79 + si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o 80 80 81 81 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o 82 82 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
+395
drivers/gpu/drm/radeon/cik.c
··· 30 30 #include "radeon_asic.h" 31 31 #include "cikd.h" 32 32 #include "atom.h" 33 + #include "cik_blit_shaders.h" 33 34 34 35 /* GFX */ 35 36 #define CIK_PFP_UCODE_SIZE 2144 ··· 1490 1489 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 1491 1490 1492 1491 udelay(50); 1492 + } 1493 + 1494 + /* 1495 + * CP. 1496 + * On CIK, gfx and compute now have independant command processors. 1497 + * 1498 + * GFX 1499 + * Gfx consists of a single ring and can process both gfx jobs and 1500 + * compute jobs. The gfx CP consists of three microengines (ME): 1501 + * PFP - Pre-Fetch Parser 1502 + * ME - Micro Engine 1503 + * CE - Constant Engine 1504 + * The PFP and ME make up what is considered the Drawing Engine (DE). 1505 + * The CE is an asynchronous engine used for updating buffer desciptors 1506 + * used by the DE so that they can be loaded into cache in parallel 1507 + * while the DE is processing state update packets. 1508 + * 1509 + * Compute 1510 + * The compute CP consists of two microengines (ME): 1511 + * MEC1 - Compute MicroEngine 1 1512 + * MEC2 - Compute MicroEngine 2 1513 + * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 1514 + * The queues are exposed to userspace and are programmed directly 1515 + * by the compute runtime. 1516 + */ 1517 + /** 1518 + * cik_cp_gfx_enable - enable/disable the gfx CP MEs 1519 + * 1520 + * @rdev: radeon_device pointer 1521 + * @enable: enable or disable the MEs 1522 + * 1523 + * Halts or unhalts the gfx MEs. 1524 + */ 1525 + static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 1526 + { 1527 + if (enable) 1528 + WREG32(CP_ME_CNTL, 0); 1529 + else { 1530 + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 1531 + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1532 + } 1533 + udelay(50); 1534 + } 1535 + 1536 + /** 1537 + * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 1538 + * 1539 + * @rdev: radeon_device pointer 1540 + * 1541 + * Loads the gfx PFP, ME, and CE ucode. 1542 + * Returns 0 for success, -EINVAL if the ucode is not available. 1543 + */ 1544 + static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 1545 + { 1546 + const __be32 *fw_data; 1547 + int i; 1548 + 1549 + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 1550 + return -EINVAL; 1551 + 1552 + cik_cp_gfx_enable(rdev, false); 1553 + 1554 + /* PFP */ 1555 + fw_data = (const __be32 *)rdev->pfp_fw->data; 1556 + WREG32(CP_PFP_UCODE_ADDR, 0); 1557 + for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 1558 + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 1559 + WREG32(CP_PFP_UCODE_ADDR, 0); 1560 + 1561 + /* CE */ 1562 + fw_data = (const __be32 *)rdev->ce_fw->data; 1563 + WREG32(CP_CE_UCODE_ADDR, 0); 1564 + for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 1565 + WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 1566 + WREG32(CP_CE_UCODE_ADDR, 0); 1567 + 1568 + /* ME */ 1569 + fw_data = (const __be32 *)rdev->me_fw->data; 1570 + WREG32(CP_ME_RAM_WADDR, 0); 1571 + for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 1572 + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 1573 + WREG32(CP_ME_RAM_WADDR, 0); 1574 + 1575 + WREG32(CP_PFP_UCODE_ADDR, 0); 1576 + WREG32(CP_CE_UCODE_ADDR, 0); 1577 + WREG32(CP_ME_RAM_WADDR, 0); 1578 + WREG32(CP_ME_RAM_RADDR, 0); 1579 + return 0; 1580 + } 1581 + 1582 + /** 1583 + * cik_cp_gfx_start - start the gfx ring 1584 + * 1585 + * @rdev: radeon_device pointer 1586 + * 1587 + * Enables the ring and loads the clear state context and other 1588 + * packets required to init the ring. 1589 + * Returns 0 for success, error for failure. 1590 + */ 1591 + static int cik_cp_gfx_start(struct radeon_device *rdev) 1592 + { 1593 + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1594 + int r, i; 1595 + 1596 + /* init the CP */ 1597 + WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 1598 + WREG32(CP_ENDIAN_SWAP, 0); 1599 + WREG32(CP_DEVICE_ID, 1); 1600 + 1601 + cik_cp_gfx_enable(rdev, true); 1602 + 1603 + r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 1604 + if (r) { 1605 + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 1606 + return r; 1607 + } 1608 + 1609 + /* init the CE partitions. CE only used for gfx on CIK */ 1610 + radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 1611 + radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 1612 + radeon_ring_write(ring, 0xc000); 1613 + radeon_ring_write(ring, 0xc000); 1614 + 1615 + /* setup clear context state */ 1616 + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1617 + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1618 + 1619 + radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1620 + radeon_ring_write(ring, 0x80000000); 1621 + radeon_ring_write(ring, 0x80000000); 1622 + 1623 + for (i = 0; i < cik_default_size; i++) 1624 + radeon_ring_write(ring, cik_default_state[i]); 1625 + 1626 + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1627 + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 1628 + 1629 + /* set clear context state */ 1630 + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 1631 + radeon_ring_write(ring, 0); 1632 + 1633 + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1634 + radeon_ring_write(ring, 0x00000316); 1635 + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 1636 + radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 1637 + 1638 + radeon_ring_unlock_commit(rdev, ring); 1639 + 1640 + return 0; 1641 + } 1642 + 1643 + /** 1644 + * cik_cp_gfx_fini - stop the gfx ring 1645 + * 1646 + * @rdev: radeon_device pointer 1647 + * 1648 + * Stop the gfx ring and tear down the driver ring 1649 + * info. 1650 + */ 1651 + static void cik_cp_gfx_fini(struct radeon_device *rdev) 1652 + { 1653 + cik_cp_gfx_enable(rdev, false); 1654 + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1655 + } 1656 + 1657 + /** 1658 + * cik_cp_gfx_resume - setup the gfx ring buffer registers 1659 + * 1660 + * @rdev: radeon_device pointer 1661 + * 1662 + * Program the location and size of the gfx ring buffer 1663 + * and test it to make sure it's working. 1664 + * Returns 0 for success, error for failure. 1665 + */ 1666 + static int cik_cp_gfx_resume(struct radeon_device *rdev) 1667 + { 1668 + struct radeon_ring *ring; 1669 + u32 tmp; 1670 + u32 rb_bufsz; 1671 + u64 rb_addr; 1672 + int r; 1673 + 1674 + WREG32(CP_SEM_WAIT_TIMER, 0x0); 1675 + WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 1676 + 1677 + /* Set the write pointer delay */ 1678 + WREG32(CP_RB_WPTR_DELAY, 0); 1679 + 1680 + /* set the RB to use vmid 0 */ 1681 + WREG32(CP_RB_VMID, 0); 1682 + 1683 + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 1684 + 1685 + /* ring 0 - compute and gfx */ 1686 + /* Set ring buffer size */ 1687 + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1688 + rb_bufsz = drm_order(ring->ring_size / 8); 1689 + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 1690 + #ifdef __BIG_ENDIAN 1691 + tmp |= BUF_SWAP_32BIT; 1692 + #endif 1693 + WREG32(CP_RB0_CNTL, tmp); 1694 + 1695 + /* Initialize the ring buffer's read and write pointers */ 1696 + WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 1697 + ring->wptr = 0; 1698 + WREG32(CP_RB0_WPTR, ring->wptr); 1699 + 1700 + /* set the wb address wether it's enabled or not */ 1701 + WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 1702 + WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 1703 + 1704 + /* scratch register shadowing is no longer supported */ 1705 + WREG32(SCRATCH_UMSK, 0); 1706 + 1707 + if (!rdev->wb.enabled) 1708 + tmp |= RB_NO_UPDATE; 1709 + 1710 + mdelay(1); 1711 + WREG32(CP_RB0_CNTL, tmp); 1712 + 1713 + rb_addr = ring->gpu_addr >> 8; 1714 + WREG32(CP_RB0_BASE, rb_addr); 1715 + WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 1716 + 1717 + ring->rptr = RREG32(CP_RB0_RPTR); 1718 + 1719 + /* start the ring */ 1720 + cik_cp_gfx_start(rdev); 1721 + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 1722 + r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1723 + if (r) { 1724 + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1725 + return r; 1726 + } 1727 + return 0; 1728 + } 1729 + 1730 + /** 1731 + * cik_cp_compute_enable - enable/disable the compute CP MEs 1732 + * 1733 + * @rdev: radeon_device pointer 1734 + * @enable: enable or disable the MEs 1735 + * 1736 + * Halts or unhalts the compute MEs. 1737 + */ 1738 + static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 1739 + { 1740 + if (enable) 1741 + WREG32(CP_MEC_CNTL, 0); 1742 + else 1743 + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 1744 + udelay(50); 1745 + } 1746 + 1747 + /** 1748 + * cik_cp_compute_load_microcode - load the compute CP ME ucode 1749 + * 1750 + * @rdev: radeon_device pointer 1751 + * 1752 + * Loads the compute MEC1&2 ucode. 1753 + * Returns 0 for success, -EINVAL if the ucode is not available. 1754 + */ 1755 + static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 1756 + { 1757 + const __be32 *fw_data; 1758 + int i; 1759 + 1760 + if (!rdev->mec_fw) 1761 + return -EINVAL; 1762 + 1763 + cik_cp_compute_enable(rdev, false); 1764 + 1765 + /* MEC1 */ 1766 + fw_data = (const __be32 *)rdev->mec_fw->data; 1767 + WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 1768 + for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 1769 + WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 1770 + WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 1771 + 1772 + if (rdev->family == CHIP_KAVERI) { 1773 + /* MEC2 */ 1774 + fw_data = (const __be32 *)rdev->mec_fw->data; 1775 + WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 1776 + for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 1777 + WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 1778 + WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 1779 + } 1780 + 1781 + return 0; 1782 + } 1783 + 1784 + /** 1785 + * cik_cp_compute_start - start the compute queues 1786 + * 1787 + * @rdev: radeon_device pointer 1788 + * 1789 + * Enable the compute queues. 1790 + * Returns 0 for success, error for failure. 1791 + */ 1792 + static int cik_cp_compute_start(struct radeon_device *rdev) 1793 + { 1794 + //todo 1795 + return 0; 1796 + } 1797 + 1798 + /** 1799 + * cik_cp_compute_fini - stop the compute queues 1800 + * 1801 + * @rdev: radeon_device pointer 1802 + * 1803 + * Stop the compute queues and tear down the driver queue 1804 + * info. 1805 + */ 1806 + static void cik_cp_compute_fini(struct radeon_device *rdev) 1807 + { 1808 + cik_cp_compute_enable(rdev, false); 1809 + //todo 1810 + } 1811 + 1812 + /** 1813 + * cik_cp_compute_resume - setup the compute queue registers 1814 + * 1815 + * @rdev: radeon_device pointer 1816 + * 1817 + * Program the compute queues and test them to make sure they 1818 + * are working. 1819 + * Returns 0 for success, error for failure. 1820 + */ 1821 + static int cik_cp_compute_resume(struct radeon_device *rdev) 1822 + { 1823 + int r; 1824 + 1825 + //todo 1826 + r = cik_cp_compute_start(rdev); 1827 + if (r) 1828 + return r; 1829 + return 0; 1830 + } 1831 + 1832 + /* XXX temporary wrappers to handle both compute and gfx */ 1833 + /* XXX */ 1834 + static void cik_cp_enable(struct radeon_device *rdev, bool enable) 1835 + { 1836 + cik_cp_gfx_enable(rdev, enable); 1837 + cik_cp_compute_enable(rdev, enable); 1838 + } 1839 + 1840 + /* XXX */ 1841 + static int cik_cp_load_microcode(struct radeon_device *rdev) 1842 + { 1843 + int r; 1844 + 1845 + r = cik_cp_gfx_load_microcode(rdev); 1846 + if (r) 1847 + return r; 1848 + r = cik_cp_compute_load_microcode(rdev); 1849 + if (r) 1850 + return r; 1851 + 1852 + return 0; 1853 + } 1854 + 1855 + /* XXX */ 1856 + static void cik_cp_fini(struct radeon_device *rdev) 1857 + { 1858 + cik_cp_gfx_fini(rdev); 1859 + cik_cp_compute_fini(rdev); 1860 + } 1861 + 1862 + /* XXX */ 1863 + static int cik_cp_resume(struct radeon_device *rdev) 1864 + { 1865 + int r; 1866 + 1867 + /* Reset all cp blocks */ 1868 + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); 1869 + RREG32(GRBM_SOFT_RESET); 1870 + mdelay(15); 1871 + WREG32(GRBM_SOFT_RESET, 0); 1872 + RREG32(GRBM_SOFT_RESET); 1873 + 1874 + r = cik_cp_load_microcode(rdev); 1875 + if (r) 1876 + return r; 1877 + 1878 + r = cik_cp_gfx_resume(rdev); 1879 + if (r) 1880 + return r; 1881 + r = cik_cp_compute_resume(rdev); 1882 + if (r) 1883 + return r; 1884 + 1885 + return 0; 1493 1886 } 1494 1887 1495 1888 /**
+246
drivers/gpu/drm/radeon/cik_blit_shaders.c
··· 1 + /* 2 + * Copyright 2012 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 + * DEALINGS IN THE SOFTWARE. 22 + * 23 + * Authors: 24 + * Alex Deucher <alexander.deucher@amd.com> 25 + */ 26 + 27 + #include <linux/types.h> 28 + #include <linux/bug.h> 29 + #include <linux/kernel.h> 30 + 31 + const u32 cik_default_state[] = 32 + { 33 + 0xc0066900, 34 + 0x00000000, 35 + 0x00000060, /* DB_RENDER_CONTROL */ 36 + 0x00000000, /* DB_COUNT_CONTROL */ 37 + 0x00000000, /* DB_DEPTH_VIEW */ 38 + 0x0000002a, /* DB_RENDER_OVERRIDE */ 39 + 0x00000000, /* DB_RENDER_OVERRIDE2 */ 40 + 0x00000000, /* DB_HTILE_DATA_BASE */ 41 + 42 + 0xc0046900, 43 + 0x00000008, 44 + 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ 45 + 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ 46 + 0x00000000, /* DB_STENCIL_CLEAR */ 47 + 0x00000000, /* DB_DEPTH_CLEAR */ 48 + 49 + 0xc0036900, 50 + 0x0000000f, 51 + 0x00000000, /* DB_DEPTH_INFO */ 52 + 0x00000000, /* DB_Z_INFO */ 53 + 0x00000000, /* DB_STENCIL_INFO */ 54 + 55 + 0xc0016900, 56 + 0x00000080, 57 + 0x00000000, /* PA_SC_WINDOW_OFFSET */ 58 + 59 + 0xc00d6900, 60 + 0x00000083, 61 + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ 62 + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ 63 + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ 64 + 0x00000000, 65 + 0x20002000, 66 + 0x00000000, 67 + 0x20002000, 68 + 0x00000000, 69 + 0x20002000, 70 + 0xaaaaaaaa, /* PA_SC_EDGERULE */ 71 + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ 72 + 0x0000000f, /* CB_TARGET_MASK */ 73 + 0x0000000f, /* CB_SHADER_MASK */ 74 + 75 + 0xc0226900, 76 + 0x00000094, 77 + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ 78 + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ 79 + 0x80000000, 80 + 0x20002000, 81 + 0x80000000, 82 + 0x20002000, 83 + 0x80000000, 84 + 0x20002000, 85 + 0x80000000, 86 + 0x20002000, 87 + 0x80000000, 88 + 0x20002000, 89 + 0x80000000, 90 + 0x20002000, 91 + 0x80000000, 92 + 0x20002000, 93 + 0x80000000, 94 + 0x20002000, 95 + 0x80000000, 96 + 0x20002000, 97 + 0x80000000, 98 + 0x20002000, 99 + 0x80000000, 100 + 0x20002000, 101 + 0x80000000, 102 + 0x20002000, 103 + 0x80000000, 104 + 0x20002000, 105 + 0x80000000, 106 + 0x20002000, 107 + 0x80000000, 108 + 0x20002000, 109 + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ 110 + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ 111 + 112 + 0xc0046900, 113 + 0x00000100, 114 + 0xffffffff, /* VGT_MAX_VTX_INDX */ 115 + 0x00000000, /* VGT_MIN_VTX_INDX */ 116 + 0x00000000, /* VGT_INDX_OFFSET */ 117 + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ 118 + 119 + 0xc0046900, 120 + 0x00000105, 121 + 0x00000000, /* CB_BLEND_RED */ 122 + 0x00000000, /* CB_BLEND_GREEN */ 123 + 0x00000000, /* CB_BLEND_BLUE */ 124 + 0x00000000, /* CB_BLEND_ALPHA */ 125 + 126 + 0xc0016900, 127 + 0x000001e0, 128 + 0x00000000, /* CB_BLEND0_CONTROL */ 129 + 130 + 0xc00c6900, 131 + 0x00000200, 132 + 0x00000000, /* DB_DEPTH_CONTROL */ 133 + 0x00000000, /* DB_EQAA */ 134 + 0x00cc0010, /* CB_COLOR_CONTROL */ 135 + 0x00000210, /* DB_SHADER_CONTROL */ 136 + 0x00010000, /* PA_CL_CLIP_CNTL */ 137 + 0x00000004, /* PA_SU_SC_MODE_CNTL */ 138 + 0x00000100, /* PA_CL_VTE_CNTL */ 139 + 0x00000000, /* PA_CL_VS_OUT_CNTL */ 140 + 0x00000000, /* PA_CL_NANINF_CNTL */ 141 + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ 142 + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ 143 + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ 144 + 145 + 0xc0116900, 146 + 0x00000280, 147 + 0x00000000, /* PA_SU_POINT_SIZE */ 148 + 0x00000000, /* PA_SU_POINT_MINMAX */ 149 + 0x00000008, /* PA_SU_LINE_CNTL */ 150 + 0x00000000, /* PA_SC_LINE_STIPPLE */ 151 + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ 152 + 0x00000000, /* VGT_HOS_CNTL */ 153 + 0x00000000, 154 + 0x00000000, 155 + 0x00000000, 156 + 0x00000000, 157 + 0x00000000, 158 + 0x00000000, 159 + 0x00000000, 160 + 0x00000000, 161 + 0x00000000, 162 + 0x00000000, 163 + 0x00000000, /* VGT_GS_MODE */ 164 + 165 + 0xc0026900, 166 + 0x00000292, 167 + 0x00000000, /* PA_SC_MODE_CNTL_0 */ 168 + 0x00000000, /* PA_SC_MODE_CNTL_1 */ 169 + 170 + 0xc0016900, 171 + 0x000002a1, 172 + 0x00000000, /* VGT_PRIMITIVEID_EN */ 173 + 174 + 0xc0016900, 175 + 0x000002a5, 176 + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ 177 + 178 + 0xc0026900, 179 + 0x000002a8, 180 + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ 181 + 0x00000000, 182 + 183 + 0xc0026900, 184 + 0x000002ad, 185 + 0x00000000, /* VGT_REUSE_OFF */ 186 + 0x00000000, 187 + 188 + 0xc0016900, 189 + 0x000002d5, 190 + 0x00000000, /* VGT_SHADER_STAGES_EN */ 191 + 192 + 0xc0016900, 193 + 0x000002dc, 194 + 0x0000aa00, /* DB_ALPHA_TO_MASK */ 195 + 196 + 0xc0066900, 197 + 0x000002de, 198 + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ 199 + 0x00000000, 200 + 0x00000000, 201 + 0x00000000, 202 + 0x00000000, 203 + 0x00000000, 204 + 205 + 0xc0026900, 206 + 0x000002e5, 207 + 0x00000000, /* VGT_STRMOUT_CONFIG */ 208 + 0x00000000, 209 + 210 + 0xc01b6900, 211 + 0x000002f5, 212 + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ 213 + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ 214 + 0x00000000, /* PA_SC_LINE_CNTL */ 215 + 0x00000000, /* PA_SC_AA_CONFIG */ 216 + 0x00000005, /* PA_SU_VTX_CNTL */ 217 + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ 218 + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ 219 + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ 220 + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ 221 + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 222 + 0x00000000, 223 + 0x00000000, 224 + 0x00000000, 225 + 0x00000000, 226 + 0x00000000, 227 + 0x00000000, 228 + 0x00000000, 229 + 0x00000000, 230 + 0x00000000, 231 + 0x00000000, 232 + 0x00000000, 233 + 0x00000000, 234 + 0x00000000, 235 + 0x00000000, 236 + 0x00000000, 237 + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ 238 + 0xffffffff, 239 + 240 + 0xc0026900, 241 + 0x00000316, 242 + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 243 + 0x00000010, /* */ 244 + }; 245 + 246 + const u32 cik_default_size = ARRAY_SIZE(cik_default_state);
+32
drivers/gpu/drm/radeon/cik_blit_shaders.h
··· 1 + /* 2 + * Copyright 2012 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 + * DEALINGS IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef CIK_BLIT_SHADERS_H 26 + #define CIK_BLIT_SHADERS_H 27 + 28 + extern const u32 cik_default_state[]; 29 + 30 + extern const u32 cik_default_size; 31 + 32 + #endif
+222
drivers/gpu/drm/radeon/cikd.h
··· 263 263 #define MEC_ME2_HALT (1 << 28) 264 264 #define MEC_ME1_HALT (1 << 30) 265 265 266 + #define CP_MEC_CNTL 0x8234 267 + #define MEC_ME2_HALT (1 << 28) 268 + #define MEC_ME1_HALT (1 << 30) 269 + 266 270 #define CP_ME_CNTL 0x86D8 267 271 #define CP_CE_HALT (1 << 24) 268 272 #define CP_PFP_HALT (1 << 26) 269 273 #define CP_ME_HALT (1 << 28) 274 + 275 + #define CP_RB0_RPTR 0x8700 276 + #define CP_RB_WPTR_DELAY 0x8704 270 277 271 278 #define CP_MEQ_THRESHOLDS 0x8764 272 279 #define MEQ1_START(x) ((x) << 0) ··· 452 445 #define TC_CFG_L1_VOLATILE 0xAC88 453 446 #define TC_CFG_L2_VOLATILE 0xAC8C 454 447 448 + #define CP_RB0_BASE 0xC100 449 + #define CP_RB0_CNTL 0xC104 450 + #define RB_BUFSZ(x) ((x) << 0) 451 + #define RB_BLKSZ(x) ((x) << 8) 452 + #define BUF_SWAP_32BIT (2 << 16) 453 + #define RB_NO_UPDATE (1 << 27) 454 + #define RB_RPTR_WR_ENA (1 << 31) 455 + 456 + #define CP_RB0_RPTR_ADDR 0xC10C 457 + #define RB_RPTR_SWAP_32BIT (2 << 0) 458 + #define CP_RB0_RPTR_ADDR_HI 0xC110 459 + #define CP_RB0_WPTR 0xC114 460 + 461 + #define CP_DEVICE_ID 0xC12C 462 + #define CP_ENDIAN_SWAP 0xC140 463 + #define CP_RB_VMID 0xC144 464 + 465 + #define CP_PFP_UCODE_ADDR 0xC150 466 + #define CP_PFP_UCODE_DATA 0xC154 467 + #define CP_ME_RAM_RADDR 0xC158 468 + #define CP_ME_RAM_WADDR 0xC15C 469 + #define CP_ME_RAM_DATA 0xC160 470 + 471 + #define CP_CE_UCODE_ADDR 0xC168 472 + #define CP_CE_UCODE_DATA 0xC16C 473 + #define CP_MEC_ME1_UCODE_ADDR 0xC170 474 + #define CP_MEC_ME1_UCODE_DATA 0xC174 475 + #define CP_MEC_ME2_UCODE_ADDR 0xC178 476 + #define CP_MEC_ME2_UCODE_DATA 0xC17C 477 + 478 + #define CP_MAX_CONTEXT 0xC2B8 479 + 480 + #define CP_RB0_BASE_HI 0xC2C4 481 + 455 482 #define PA_SC_RASTER_CONFIG 0x28350 456 483 # define RASTER_CONFIG_RB_MAP_0 0 457 484 # define RASTER_CONFIG_RB_MAP_1 1 458 485 # define RASTER_CONFIG_RB_MAP_2 2 459 486 # define RASTER_CONFIG_RB_MAP_3 3 487 + 488 + #define SCRATCH_REG0 0x30100 489 + #define SCRATCH_REG1 0x30104 490 + #define SCRATCH_REG2 0x30108 491 + #define SCRATCH_REG3 0x3010C 492 + #define SCRATCH_REG4 0x30110 493 + #define SCRATCH_REG5 0x30114 494 + #define SCRATCH_REG6 0x30118 495 + #define SCRATCH_REG7 0x3011C 496 + 497 + #define SCRATCH_UMSK 0x30140 498 + #define SCRATCH_ADDR 0x30144 499 + 500 + #define CP_SEM_WAIT_TIMER 0x301BC 501 + 502 + #define CP_SEM_INCOMPLETE_TIMER_CNTL 0x301C8 460 503 461 504 #define GRBM_GFX_INDEX 0x30800 462 505 #define INSTANCE_INDEX(x) ((x) << 0) ··· 538 481 #define CGTS_USER_TCC_DISABLE 0x3c010 539 482 #define TCC_DISABLE_MASK 0xFFFF0000 540 483 #define TCC_DISABLE_SHIFT 16 484 + 485 + /* 486 + * PM4 487 + */ 488 + #define PACKET_TYPE0 0 489 + #define PACKET_TYPE1 1 490 + #define PACKET_TYPE2 2 491 + #define PACKET_TYPE3 3 492 + 493 + #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 494 + #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 495 + #define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) 496 + #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 497 + #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 498 + (((reg) >> 2) & 0xFFFF) | \ 499 + ((n) & 0x3FFF) << 16) 500 + #define CP_PACKET2 0x80000000 501 + #define PACKET2_PAD_SHIFT 0 502 + #define PACKET2_PAD_MASK (0x3fffffff << 0) 503 + 504 + #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 505 + 506 + #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 507 + (((op) & 0xFF) << 8) | \ 508 + ((n) & 0x3FFF) << 16) 509 + 510 + #define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) 511 + 512 + /* Packet 3 types */ 513 + #define PACKET3_NOP 0x10 514 + #define PACKET3_SET_BASE 0x11 515 + #define PACKET3_BASE_INDEX(x) ((x) << 0) 516 + #define CE_PARTITION_BASE 3 517 + #define PACKET3_CLEAR_STATE 0x12 518 + #define PACKET3_INDEX_BUFFER_SIZE 0x13 519 + #define PACKET3_DISPATCH_DIRECT 0x15 520 + #define PACKET3_DISPATCH_INDIRECT 0x16 521 + #define PACKET3_ATOMIC_GDS 0x1D 522 + #define PACKET3_ATOMIC_MEM 0x1E 523 + #define PACKET3_OCCLUSION_QUERY 0x1F 524 + #define PACKET3_SET_PREDICATION 0x20 525 + #define PACKET3_REG_RMW 0x21 526 + #define PACKET3_COND_EXEC 0x22 527 + #define PACKET3_PRED_EXEC 0x23 528 + #define PACKET3_DRAW_INDIRECT 0x24 529 + #define PACKET3_DRAW_INDEX_INDIRECT 0x25 530 + #define PACKET3_INDEX_BASE 0x26 531 + #define PACKET3_DRAW_INDEX_2 0x27 532 + #define PACKET3_CONTEXT_CONTROL 0x28 533 + #define PACKET3_INDEX_TYPE 0x2A 534 + #define PACKET3_DRAW_INDIRECT_MULTI 0x2C 535 + #define PACKET3_DRAW_INDEX_AUTO 0x2D 536 + #define PACKET3_NUM_INSTANCES 0x2F 537 + #define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 538 + #define PACKET3_INDIRECT_BUFFER_CONST 0x33 539 + #define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 540 + #define PACKET3_DRAW_INDEX_OFFSET_2 0x35 541 + #define PACKET3_DRAW_PREAMBLE 0x36 542 + #define PACKET3_WRITE_DATA 0x37 543 + #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 544 + #define PACKET3_MEM_SEMAPHORE 0x39 545 + #define PACKET3_COPY_DW 0x3B 546 + #define PACKET3_WAIT_REG_MEM 0x3C 547 + #define PACKET3_INDIRECT_BUFFER 0x3F 548 + #define PACKET3_COPY_DATA 0x40 549 + #define PACKET3_PFP_SYNC_ME 0x42 550 + #define PACKET3_SURFACE_SYNC 0x43 551 + # define PACKET3_DEST_BASE_0_ENA (1 << 0) 552 + # define PACKET3_DEST_BASE_1_ENA (1 << 1) 553 + # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) 554 + # define PACKET3_CB1_DEST_BASE_ENA (1 << 7) 555 + # define PACKET3_CB2_DEST_BASE_ENA (1 << 8) 556 + # define PACKET3_CB3_DEST_BASE_ENA (1 << 9) 557 + # define PACKET3_CB4_DEST_BASE_ENA (1 << 10) 558 + # define PACKET3_CB5_DEST_BASE_ENA (1 << 11) 559 + # define PACKET3_CB6_DEST_BASE_ENA (1 << 12) 560 + # define PACKET3_CB7_DEST_BASE_ENA (1 << 13) 561 + # define PACKET3_DB_DEST_BASE_ENA (1 << 14) 562 + # define PACKET3_TCL1_VOL_ACTION_ENA (1 << 15) 563 + # define PACKET3_TC_VOL_ACTION_ENA (1 << 16) /* L2 */ 564 + # define PACKET3_TC_WB_ACTION_ENA (1 << 18) /* L2 */ 565 + # define PACKET3_DEST_BASE_2_ENA (1 << 19) 566 + # define PACKET3_DEST_BASE_3_ENA (1 << 21) 567 + # define PACKET3_TCL1_ACTION_ENA (1 << 22) 568 + # define PACKET3_TC_ACTION_ENA (1 << 23) /* L2 */ 569 + # define PACKET3_CB_ACTION_ENA (1 << 25) 570 + # define PACKET3_DB_ACTION_ENA (1 << 26) 571 + # define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) 572 + # define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28) 573 + # define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) 574 + #define PACKET3_COND_WRITE 0x45 575 + #define PACKET3_EVENT_WRITE 0x46 576 + #define EVENT_TYPE(x) ((x) << 0) 577 + #define EVENT_INDEX(x) ((x) << 8) 578 + /* 0 - any non-TS event 579 + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* 580 + * 2 - SAMPLE_PIPELINESTAT 581 + * 3 - SAMPLE_STREAMOUTSTAT* 582 + * 4 - *S_PARTIAL_FLUSH 583 + * 5 - EOP events 584 + * 6 - EOS events 585 + */ 586 + #define PACKET3_EVENT_WRITE_EOP 0x47 587 + #define EOP_TCL1_VOL_ACTION_EN (1 << 12) 588 + #define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */ 589 + #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ 590 + #define EOP_TCL1_ACTION_EN (1 << 16) 591 + #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ 592 + #define CACHE_POLICY(x) ((x) << 25) 593 + /* 0 - LRU 594 + * 1 - Stream 595 + * 2 - Bypass 596 + */ 597 + #define TCL2_VOLATILE (1 << 27) 598 + #define DATA_SEL(x) ((x) << 29) 599 + /* 0 - discard 600 + * 1 - send low 32bit data 601 + * 2 - send 64bit data 602 + * 3 - send 64bit GPU counter value 603 + * 4 - send 64bit sys counter value 604 + */ 605 + #define INT_SEL(x) ((x) << 24) 606 + /* 0 - none 607 + * 1 - interrupt only (DATA_SEL = 0) 608 + * 2 - interrupt when data write is confirmed 609 + */ 610 + #define DST_SEL(x) ((x) << 16) 611 + /* 0 - MC 612 + * 1 - TC/L2 613 + */ 614 + #define PACKET3_EVENT_WRITE_EOS 0x48 615 + #define PACKET3_RELEASE_MEM 0x49 616 + #define PACKET3_PREAMBLE_CNTL 0x4A 617 + # define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) 618 + # define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) 619 + #define PACKET3_DMA_DATA 0x50 620 + #define PACKET3_AQUIRE_MEM 0x58 621 + #define PACKET3_REWIND 0x59 622 + #define PACKET3_LOAD_UCONFIG_REG 0x5E 623 + #define PACKET3_LOAD_SH_REG 0x5F 624 + #define PACKET3_LOAD_CONFIG_REG 0x60 625 + #define PACKET3_LOAD_CONTEXT_REG 0x61 626 + #define PACKET3_SET_CONFIG_REG 0x68 627 + #define PACKET3_SET_CONFIG_REG_START 0x00008000 628 + #define PACKET3_SET_CONFIG_REG_END 0x0000b000 629 + #define PACKET3_SET_CONTEXT_REG 0x69 630 + #define PACKET3_SET_CONTEXT_REG_START 0x00028000 631 + #define PACKET3_SET_CONTEXT_REG_END 0x00029000 632 + #define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 633 + #define PACKET3_SET_SH_REG 0x76 634 + #define PACKET3_SET_SH_REG_START 0x0000b000 635 + #define PACKET3_SET_SH_REG_END 0x0000c000 636 + #define PACKET3_SET_SH_REG_OFFSET 0x77 637 + #define PACKET3_SET_QUEUE_REG 0x78 638 + #define PACKET3_SET_UCONFIG_REG 0x79 639 + #define PACKET3_SCRATCH_RAM_WRITE 0x7D 640 + #define PACKET3_SCRATCH_RAM_READ 0x7E 641 + #define PACKET3_LOAD_CONST_RAM 0x80 642 + #define PACKET3_WRITE_CONST_RAM 0x81 643 + #define PACKET3_DUMP_CONST_RAM 0x83 644 + #define PACKET3_INCREMENT_CE_COUNTER 0x84 645 + #define PACKET3_INCREMENT_DE_COUNTER 0x85 646 + #define PACKET3_WAIT_ON_CE_COUNTER 0x86 647 + #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 648 + 541 649 542 650 #endif
+3 -1
drivers/gpu/drm/radeon/radeon_cs.c
··· 121 121 p->ring = RADEON_RING_TYPE_GFX_INDEX; 122 122 break; 123 123 case RADEON_CS_RING_COMPUTE: 124 - if (p->rdev->family >= CHIP_TAHITI) { 124 + if (p->rdev->family >= CHIP_BONAIRE) 125 + p->ring = RADEON_RING_TYPE_GFX_INDEX; 126 + else if (p->rdev->family >= CHIP_TAHITI) { 125 127 if (p->priority > 0) 126 128 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 127 129 else