Merge tag 'drm/tegra/for-5.15-rc1' of ssh://git.freedesktop.org/git/tegra/linux into drm-next

+1

drivers/gpu/drm/tegra/Kconfig

··· 9 9 select DRM_MIPI_DSI 10 10 select DRM_PANEL 11 11 select TEGRA_HOST1X 12 + select INTERCONNECT 12 13 select IOMMU_IOVA 13 14 select CEC_CORE if CEC_NOTIFIER 14 15 help

+3

drivers/gpu/drm/tegra/Makefile

··· 3 3 4 4 tegra-drm-y := \ 5 5 drm.o \ 6 + uapi.o \ 7 + submit.o \ 8 + firewall.o \ 6 9 gem.o \ 7 10 fb.o \ 8 11 dp.o \

+356 -2

drivers/gpu/drm/tegra/dc.c

··· 8 8 #include <linux/debugfs.h> 9 9 #include <linux/delay.h> 10 10 #include <linux/iommu.h> 11 + #include <linux/interconnect.h> 11 12 #include <linux/module.h> 12 13 #include <linux/of_device.h> 13 14 #include <linux/pm_runtime.h> ··· 619 618 struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc); 620 619 int err; 621 620 621 + plane_state->peak_memory_bandwidth = 0; 622 + plane_state->avg_memory_bandwidth = 0; 623 + 622 624 /* no need for further checks if the plane is being disabled */ 623 - if (!new_plane_state->crtc) 625 + if (!new_plane_state->crtc) { 626 + plane_state->total_peak_memory_bandwidth = 0; 624 627 return 0; 628 + } 625 629 626 630 err = tegra_plane_format(new_plane_state->fb->format->format, 627 631 &plane_state->format, ··· 814 808 formats = dc->soc->primary_formats; 815 809 modifiers = dc->soc->modifiers; 816 810 811 + err = tegra_plane_interconnect_init(plane); 812 + if (err) { 813 + kfree(plane); 814 + return ERR_PTR(err); 815 + } 816 + 817 817 err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, 818 818 &tegra_plane_funcs, formats, 819 819 num_formats, modifiers, type, NULL); ··· 857 845 { 858 846 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 859 847 plane); 848 + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); 860 849 struct tegra_plane *tegra = to_tegra_plane(plane); 861 850 int err; 862 851 852 + plane_state->peak_memory_bandwidth = 0; 853 + plane_state->avg_memory_bandwidth = 0; 854 + 863 855 /* no need for further checks if the plane is being disabled */ 864 - if (!new_plane_state->crtc) 856 + if (!new_plane_state->crtc) { 857 + plane_state->total_peak_memory_bandwidth = 0; 865 858 return 0; 859 + } 866 860 867 861 /* scaling not supported for cursor */ 868 862 if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) || ··· 1048 1030 if (!dc->soc->has_nvdisplay) { 1049 1031 num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); 1050 1032 formats = tegra_legacy_cursor_plane_formats; 1033 + 1034 + err = tegra_plane_interconnect_init(plane); 1035 + if (err) { 1036 + kfree(plane); 1037 + return ERR_PTR(err); 1038 + } 1051 1039 } else { 1052 1040 num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); 1053 1041 formats = tegra_cursor_plane_formats; ··· 1172 1148 1173 1149 num_formats = dc->soc->num_overlay_formats; 1174 1150 formats = dc->soc->overlay_formats; 1151 + 1152 + err = tegra_plane_interconnect_init(plane); 1153 + if (err) { 1154 + kfree(plane); 1155 + return ERR_PTR(err); 1156 + } 1175 1157 1176 1158 if (!cursor) 1177 1159 type = DRM_PLANE_TYPE_OVERLAY; ··· 1602 1572 seq_printf(s, "underflow: %lu\n", dc->stats.underflow); 1603 1573 seq_printf(s, "overflow: %lu\n", dc->stats.overflow); 1604 1574 1575 + seq_printf(s, "frames total: %lu\n", dc->stats.frames_total); 1576 + seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total); 1577 + seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total); 1578 + seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total); 1579 + 1605 1580 return 0; 1606 1581 } 1607 1582 ··· 1839 1804 return -ETIMEDOUT; 1840 1805 } 1841 1806 1807 + static void 1808 + tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, 1809 + struct drm_atomic_state *state, 1810 + bool prepare_bandwidth_transition) 1811 + { 1812 + const struct tegra_plane_state *old_tegra_state, *new_tegra_state; 1813 + const struct tegra_dc_state *old_dc_state, *new_dc_state; 1814 + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; 1815 + const struct drm_plane_state *old_plane_state; 1816 + const struct drm_crtc_state *old_crtc_state; 1817 + struct tegra_dc_window window, old_window; 1818 + struct tegra_dc *dc = to_tegra_dc(crtc); 1819 + struct tegra_plane *tegra; 1820 + struct drm_plane *plane; 1821 + 1822 + if (dc->soc->has_nvdisplay) 1823 + return; 1824 + 1825 + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); 1826 + old_dc_state = to_const_dc_state(old_crtc_state); 1827 + new_dc_state = to_const_dc_state(crtc->state); 1828 + 1829 + if (!crtc->state->active) { 1830 + if (!old_crtc_state->active) 1831 + return; 1832 + 1833 + /* 1834 + * When CRTC is disabled on DPMS, the state of attached planes 1835 + * is kept unchanged. Hence we need to enforce removal of the 1836 + * bandwidths from the ICC paths. 1837 + */ 1838 + drm_atomic_crtc_for_each_plane(plane, crtc) { 1839 + tegra = to_tegra_plane(plane); 1840 + 1841 + icc_set_bw(tegra->icc_mem, 0, 0); 1842 + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); 1843 + } 1844 + 1845 + return; 1846 + } 1847 + 1848 + for_each_old_plane_in_state(old_crtc_state->state, plane, 1849 + old_plane_state, i) { 1850 + old_tegra_state = to_const_tegra_plane_state(old_plane_state); 1851 + new_tegra_state = to_const_tegra_plane_state(plane->state); 1852 + tegra = to_tegra_plane(plane); 1853 + 1854 + /* 1855 + * We're iterating over the global atomic state and it contains 1856 + * planes from another CRTC, hence we need to filter out the 1857 + * planes unrelated to this CRTC. 1858 + */ 1859 + if (tegra->dc != dc) 1860 + continue; 1861 + 1862 + new_avg_bw = new_tegra_state->avg_memory_bandwidth; 1863 + old_avg_bw = old_tegra_state->avg_memory_bandwidth; 1864 + 1865 + new_peak_bw = new_tegra_state->total_peak_memory_bandwidth; 1866 + old_peak_bw = old_tegra_state->total_peak_memory_bandwidth; 1867 + 1868 + /* 1869 + * See the comment related to !crtc->state->active above, 1870 + * which explains why bandwidths need to be updated when 1871 + * CRTC is turning ON. 1872 + */ 1873 + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw && 1874 + old_crtc_state->active) 1875 + continue; 1876 + 1877 + window.src.h = drm_rect_height(&plane->state->src) >> 16; 1878 + window.dst.h = drm_rect_height(&plane->state->dst); 1879 + 1880 + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16; 1881 + old_window.dst.h = drm_rect_height(&old_plane_state->dst); 1882 + 1883 + /* 1884 + * During the preparation phase (atomic_begin), the memory 1885 + * freq should go high before the DC changes are committed 1886 + * if bandwidth requirement goes up, otherwise memory freq 1887 + * should to stay high if BW requirement goes down. The 1888 + * opposite applies to the completion phase (post_commit). 1889 + */ 1890 + if (prepare_bandwidth_transition) { 1891 + new_avg_bw = max(old_avg_bw, new_avg_bw); 1892 + new_peak_bw = max(old_peak_bw, new_peak_bw); 1893 + 1894 + if (tegra_plane_use_vertical_filtering(tegra, &old_window)) 1895 + window = old_window; 1896 + } 1897 + 1898 + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw); 1899 + 1900 + if (tegra_plane_use_vertical_filtering(tegra, &window)) 1901 + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw); 1902 + else 1903 + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); 1904 + } 1905 + } 1906 + 1842 1907 static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, 1843 1908 struct drm_atomic_state *state) 1844 1909 { ··· 2120 1985 { 2121 1986 unsigned long flags; 2122 1987 1988 + tegra_crtc_update_memory_bandwidth(crtc, state, true); 1989 + 2123 1990 if (crtc->state->event) { 2124 1991 spin_lock_irqsave(&crtc->dev->event_lock, flags); 2125 1992 ··· 2154 2017 value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); 2155 2018 } 2156 2019 2020 + static bool tegra_plane_is_cursor(const struct drm_plane_state *state) 2021 + { 2022 + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc; 2023 + const struct drm_format_info *fmt = state->fb->format; 2024 + unsigned int src_w = drm_rect_width(&state->src) >> 16; 2025 + unsigned int dst_w = drm_rect_width(&state->dst); 2026 + 2027 + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) 2028 + return false; 2029 + 2030 + if (soc->supports_cursor) 2031 + return true; 2032 + 2033 + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256) 2034 + return false; 2035 + 2036 + return true; 2037 + } 2038 + 2039 + static unsigned long 2040 + tegra_plane_overlap_mask(struct drm_crtc_state *state, 2041 + const struct drm_plane_state *plane_state) 2042 + { 2043 + const struct drm_plane_state *other_state; 2044 + const struct tegra_plane *tegra; 2045 + unsigned long overlap_mask = 0; 2046 + struct drm_plane *plane; 2047 + struct drm_rect rect; 2048 + 2049 + if (!plane_state->visible || !plane_state->fb) 2050 + return 0; 2051 + 2052 + /* 2053 + * Data-prefetch FIFO will easily help to overcome temporal memory 2054 + * pressure if other plane overlaps with the cursor plane. 2055 + */ 2056 + if (tegra_plane_is_cursor(plane_state)) 2057 + return 0; 2058 + 2059 + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) { 2060 + rect = plane_state->dst; 2061 + 2062 + tegra = to_tegra_plane(other_state->plane); 2063 + 2064 + if (!other_state->visible || !other_state->fb) 2065 + continue; 2066 + 2067 + /* 2068 + * Ignore cursor plane overlaps because it's not practical to 2069 + * assume that it contributes to the bandwidth in overlapping 2070 + * area if window width is small. 2071 + */ 2072 + if (tegra_plane_is_cursor(other_state)) 2073 + continue; 2074 + 2075 + if (drm_rect_intersect(&rect, &other_state->dst)) 2076 + overlap_mask |= BIT(tegra->index); 2077 + } 2078 + 2079 + return overlap_mask; 2080 + } 2081 + 2082 + static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc, 2083 + struct drm_atomic_state *state) 2084 + { 2085 + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask; 2086 + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {}; 2087 + bool all_planes_overlap_simultaneously = true; 2088 + const struct tegra_plane_state *tegra_state; 2089 + const struct drm_plane_state *plane_state; 2090 + struct tegra_dc *dc = to_tegra_dc(crtc); 2091 + const struct drm_crtc_state *old_state; 2092 + struct drm_crtc_state *new_state; 2093 + struct tegra_plane *tegra; 2094 + struct drm_plane *plane; 2095 + 2096 + /* 2097 + * The nv-display uses shared planes. The algorithm below assumes 2098 + * maximum 3 planes per-CRTC, this assumption isn't applicable to 2099 + * the nv-display. Note that T124 support has additional windows, 2100 + * but currently they aren't supported by the driver. 2101 + */ 2102 + if (dc->soc->has_nvdisplay) 2103 + return 0; 2104 + 2105 + new_state = drm_atomic_get_new_crtc_state(state, crtc); 2106 + old_state = drm_atomic_get_old_crtc_state(state, crtc); 2107 + 2108 + /* 2109 + * For overlapping planes pixel's data is fetched for each plane at 2110 + * the same time, hence bandwidths are accumulated in this case. 2111 + * This needs to be taken into account for calculating total bandwidth 2112 + * consumed by all planes. 2113 + * 2114 + * Here we get the overlapping state of each plane, which is a 2115 + * bitmask of plane indices telling with what planes there is an 2116 + * overlap. Note that bitmask[plane] includes BIT(plane) in order 2117 + * to make further code nicer and simpler. 2118 + */ 2119 + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { 2120 + tegra_state = to_const_tegra_plane_state(plane_state); 2121 + tegra = to_tegra_plane(plane); 2122 + 2123 + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM)) 2124 + return -EINVAL; 2125 + 2126 + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth; 2127 + mask = tegra_plane_overlap_mask(new_state, plane_state); 2128 + overlap_mask[tegra->index] = mask; 2129 + 2130 + if (hweight_long(mask) != 3) 2131 + all_planes_overlap_simultaneously = false; 2132 + } 2133 + 2134 + /* 2135 + * Then we calculate maximum bandwidth of each plane state. 2136 + * The bandwidth includes the plane BW + BW of the "simultaneously" 2137 + * overlapping planes, where "simultaneously" means areas where DC 2138 + * fetches from the planes simultaneously during of scan-out process. 2139 + * 2140 + * For example, if plane A overlaps with planes B and C, but B and C 2141 + * don't overlap, then the peak bandwidth will be either in area where 2142 + * A-and-B or A-and-C planes overlap. 2143 + * 2144 + * The plane_peak_bw[] contains peak memory bandwidth values of 2145 + * each plane, this information is needed by interconnect provider 2146 + * in order to set up latency allowance based on the peak BW, see 2147 + * tegra_crtc_update_memory_bandwidth(). 2148 + */ 2149 + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { 2150 + u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0; 2151 + 2152 + /* 2153 + * Note that plane's atomic check doesn't touch the 2154 + * total_peak_memory_bandwidth of enabled plane, hence the 2155 + * current state contains the old bandwidth state from the 2156 + * previous CRTC commit. 2157 + */ 2158 + tegra_state = to_const_tegra_plane_state(plane_state); 2159 + tegra = to_tegra_plane(plane); 2160 + 2161 + for_each_set_bit(i, &overlap_mask[tegra->index], 3) { 2162 + if (i == tegra->index) 2163 + continue; 2164 + 2165 + if (all_planes_overlap_simultaneously) 2166 + overlap_bw += plane_peak_bw[i]; 2167 + else 2168 + overlap_bw = max(overlap_bw, plane_peak_bw[i]); 2169 + } 2170 + 2171 + new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw; 2172 + old_peak_bw = tegra_state->total_peak_memory_bandwidth; 2173 + 2174 + /* 2175 + * If plane's peak bandwidth changed (for example plane isn't 2176 + * overlapped anymore) and plane isn't in the atomic state, 2177 + * then add plane to the state in order to have the bandwidth 2178 + * updated. 2179 + */ 2180 + if (old_peak_bw != new_peak_bw) { 2181 + struct tegra_plane_state *new_tegra_state; 2182 + struct drm_plane_state *new_plane_state; 2183 + 2184 + new_plane_state = drm_atomic_get_plane_state(state, plane); 2185 + if (IS_ERR(new_plane_state)) 2186 + return PTR_ERR(new_plane_state); 2187 + 2188 + new_tegra_state = to_tegra_plane_state(new_plane_state); 2189 + new_tegra_state->total_peak_memory_bandwidth = new_peak_bw; 2190 + } 2191 + } 2192 + 2193 + return 0; 2194 + } 2195 + 2196 + static int tegra_crtc_atomic_check(struct drm_crtc *crtc, 2197 + struct drm_atomic_state *state) 2198 + { 2199 + int err; 2200 + 2201 + err = tegra_crtc_calculate_memory_bandwidth(crtc, state); 2202 + if (err) 2203 + return err; 2204 + 2205 + return 0; 2206 + } 2207 + 2208 + void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, 2209 + struct drm_atomic_state *state) 2210 + { 2211 + /* 2212 + * Display bandwidth is allowed to go down only once hardware state 2213 + * is known to be armed, i.e. state was committed and VBLANK event 2214 + * received. 2215 + */ 2216 + tegra_crtc_update_memory_bandwidth(crtc, state, false); 2217 + } 2218 + 2157 2219 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = { 2220 + .atomic_check = tegra_crtc_atomic_check, 2158 2221 .atomic_begin = tegra_crtc_atomic_begin, 2159 2222 .atomic_flush = tegra_crtc_atomic_flush, 2160 2223 .atomic_enable = tegra_crtc_atomic_enable, ··· 2373 2036 /* 2374 2037 dev_dbg(dc->dev, "%s(): frame end\n", __func__); 2375 2038 */ 2039 + dc->stats.frames_total++; 2376 2040 dc->stats.frames++; 2377 2041 } 2378 2042 ··· 2382 2044 dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); 2383 2045 */ 2384 2046 drm_crtc_handle_vblank(&dc->base); 2047 + dc->stats.vblank_total++; 2385 2048 dc->stats.vblank++; 2386 2049 } 2387 2050 ··· 2390 2051 /* 2391 2052 dev_dbg(dc->dev, "%s(): underflow\n", __func__); 2392 2053 */ 2054 + dc->stats.underflow_total++; 2393 2055 dc->stats.underflow++; 2394 2056 } 2395 2057 ··· 2398 2058 /* 2399 2059 dev_dbg(dc->dev, "%s(): overflow\n", __func__); 2400 2060 */ 2061 + dc->stats.overflow_total++; 2401 2062 dc->stats.overflow++; 2402 2063 } 2403 2064 2404 2065 if (status & HEAD_UF_INT) { 2405 2066 dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__); 2067 + dc->stats.underflow_total++; 2406 2068 dc->stats.underflow++; 2407 2069 } 2408 2070 ··· 2685 2343 .overlay_formats = tegra20_overlay_formats, 2686 2344 .modifiers = tegra20_modifiers, 2687 2345 .has_win_a_without_filters = true, 2346 + .has_win_b_vfilter_mem_client = true, 2688 2347 .has_win_c_without_vert_filter = true, 2348 + .plane_tiled_memory_bandwidth_x2 = false, 2689 2349 }; 2690 2350 2691 2351 static const struct tegra_dc_soc_info tegra30_dc_soc_info = { ··· 2707 2363 .overlay_formats = tegra20_overlay_formats, 2708 2364 .modifiers = tegra20_modifiers, 2709 2365 .has_win_a_without_filters = false, 2366 + .has_win_b_vfilter_mem_client = true, 2710 2367 .has_win_c_without_vert_filter = false, 2368 + .plane_tiled_memory_bandwidth_x2 = true, 2711 2369 }; 2712 2370 2713 2371 static const struct tegra_dc_soc_info tegra114_dc_soc_info = { ··· 2729 2383 .overlay_formats = tegra114_overlay_formats, 2730 2384 .modifiers = tegra20_modifiers, 2731 2385 .has_win_a_without_filters = false, 2386 + .has_win_b_vfilter_mem_client = false, 2732 2387 .has_win_c_without_vert_filter = false, 2388 + .plane_tiled_memory_bandwidth_x2 = true, 2733 2389 }; 2734 2390 2735 2391 static const struct tegra_dc_soc_info tegra124_dc_soc_info = { ··· 2751 2403 .overlay_formats = tegra124_overlay_formats, 2752 2404 .modifiers = tegra124_modifiers, 2753 2405 .has_win_a_without_filters = false, 2406 + .has_win_b_vfilter_mem_client = false, 2754 2407 .has_win_c_without_vert_filter = false, 2408 + .plane_tiled_memory_bandwidth_x2 = false, 2755 2409 }; 2756 2410 2757 2411 static const struct tegra_dc_soc_info tegra210_dc_soc_info = { ··· 2773 2423 .overlay_formats = tegra114_overlay_formats, 2774 2424 .modifiers = tegra124_modifiers, 2775 2425 .has_win_a_without_filters = false, 2426 + .has_win_b_vfilter_mem_client = false, 2776 2427 .has_win_c_without_vert_filter = false, 2428 + .plane_tiled_memory_bandwidth_x2 = false, 2777 2429 }; 2778 2430 2779 2431 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { ··· 2825 2473 .has_nvdisplay = true, 2826 2474 .wgrps = tegra186_dc_wgrps, 2827 2475 .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), 2476 + .plane_tiled_memory_bandwidth_x2 = false, 2828 2477 }; 2829 2478 2830 2479 static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { ··· 2875 2522 .has_nvdisplay = true, 2876 2523 .wgrps = tegra194_dc_wgrps, 2877 2524 .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), 2525 + .plane_tiled_memory_bandwidth_x2 = false, 2878 2526 }; 2879 2527 2880 2528 static const struct of_device_id tegra_dc_of_match[] = {

+17

drivers/gpu/drm/tegra/dc.h

··· 15 15 16 16 struct tegra_output; 17 17 18 + #define TEGRA_DC_LEGACY_PLANES_NUM 7 19 + 18 20 struct tegra_dc_state { 19 21 struct drm_crtc_state base; 20 22 ··· 35 33 return NULL; 36 34 } 37 35 36 + static inline const struct tegra_dc_state * 37 + to_const_dc_state(const struct drm_crtc_state *state) 38 + { 39 + return to_dc_state((struct drm_crtc_state *)state); 40 + } 41 + 38 42 struct tegra_dc_stats { 39 43 unsigned long frames; 40 44 unsigned long vblank; 41 45 unsigned long underflow; 42 46 unsigned long overflow; 47 + 48 + unsigned long frames_total; 49 + unsigned long vblank_total; 50 + unsigned long underflow_total; 51 + unsigned long overflow_total; 43 52 }; 44 53 45 54 struct tegra_windowgroup_soc { ··· 79 66 unsigned int num_overlay_formats; 80 67 const u64 *modifiers; 81 68 bool has_win_a_without_filters; 69 + bool has_win_b_vfilter_mem_client; 82 70 bool has_win_c_without_vert_filter; 71 + bool plane_tiled_memory_bandwidth_x2; 83 72 }; 84 73 85 74 struct tegra_dc { ··· 167 152 struct drm_crtc_state *crtc_state, 168 153 struct clk *clk, unsigned long pclk, 169 154 unsigned int div); 155 + void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, 156 + struct drm_atomic_state *state); 170 157 171 158 /* from rgb.c */ 172 159 int tegra_dc_rgb_probe(struct tegra_dc *dc);

+62 -36

drivers/gpu/drm/tegra/drm.c

··· 21 21 #include <drm/drm_prime.h> 22 22 #include <drm/drm_vblank.h> 23 23 24 + #include "dc.h" 24 25 #include "drm.h" 25 26 #include "gem.h" 27 + #include "uapi.h" 26 28 27 29 #define DRIVER_NAME "tegra" 28 30 #define DRIVER_DESC "NVIDIA Tegra graphics" 29 31 #define DRIVER_DATE "20120330" 30 - #define DRIVER_MAJOR 0 32 + #define DRIVER_MAJOR 1 31 33 #define DRIVER_MINOR 0 32 34 #define DRIVER_PATCHLEVEL 0 33 35 34 36 #define CARVEOUT_SZ SZ_64M 35 37 #define CDMA_GATHER_FETCHES_MAX_NB 16383 36 - 37 - struct tegra_drm_file { 38 - struct idr contexts; 39 - struct mutex lock; 40 - }; 41 38 42 39 static int tegra_atomic_check(struct drm_device *drm, 43 40 struct drm_atomic_state *state) ··· 57 60 .atomic_commit = drm_atomic_helper_commit, 58 61 }; 59 62 63 + static void tegra_atomic_post_commit(struct drm_device *drm, 64 + struct drm_atomic_state *old_state) 65 + { 66 + struct drm_crtc_state *old_crtc_state __maybe_unused; 67 + struct drm_crtc *crtc; 68 + unsigned int i; 69 + 70 + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) 71 + tegra_crtc_atomic_post_commit(crtc, old_state); 72 + } 73 + 60 74 static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) 61 75 { 62 76 struct drm_device *drm = old_state->dev; ··· 87 79 } else { 88 80 drm_atomic_helper_commit_tail_rpm(old_state); 89 81 } 82 + 83 + tegra_atomic_post_commit(drm, old_state); 90 84 } 91 85 92 86 static const struct drm_mode_config_helper_funcs ··· 104 94 if (!fpriv) 105 95 return -ENOMEM; 106 96 107 - idr_init_base(&fpriv->contexts, 1); 97 + idr_init_base(&fpriv->legacy_contexts, 1); 98 + xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1); 99 + xa_init(&fpriv->syncpoints); 108 100 mutex_init(&fpriv->lock); 109 101 filp->driver_priv = fpriv; 110 102 ··· 117 105 { 118 106 context->client->ops->close_channel(context); 119 107 kfree(context); 120 - } 121 - 122 - static struct host1x_bo * 123 - host1x_bo_lookup(struct drm_file *file, u32 handle) 124 - { 125 - struct drm_gem_object *gem; 126 - struct tegra_bo *bo; 127 - 128 - gem = drm_gem_object_lookup(file, handle); 129 - if (!gem) 130 - return NULL; 131 - 132 - bo = to_tegra_bo(gem); 133 - return &bo->base; 134 108 } 135 109 136 110 static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, ··· 149 151 150 152 dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; 151 153 152 - dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf); 154 + dest->cmdbuf.bo = tegra_gem_lookup(file, cmdbuf); 153 155 if (!dest->cmdbuf.bo) 154 156 return -ENOENT; 155 157 156 - dest->target.bo = host1x_bo_lookup(file, target); 158 + dest->target.bo = tegra_gem_lookup(file, target); 157 159 if (!dest->target.bo) 158 160 return -ENOENT; 159 161 ··· 191 193 return -EINVAL; 192 194 193 195 job = host1x_job_alloc(context->channel, args->num_cmdbufs, 194 - args->num_relocs); 196 + args->num_relocs, false); 195 197 if (!job) 196 198 return -ENOMEM; 197 199 ··· 199 201 job->client = client; 200 202 job->class = client->class; 201 203 job->serialize = true; 204 + job->syncpt_recovery = true; 202 205 203 206 /* 204 207 * Track referenced BOs so that they can be unreferenced after the ··· 236 237 goto fail; 237 238 } 238 239 239 - bo = host1x_bo_lookup(file, cmdbuf.handle); 240 + bo = tegra_gem_lookup(file, cmdbuf.handle); 240 241 if (!bo) { 241 242 err = -ENOENT; 242 243 goto fail; ··· 431 432 if (err < 0) 432 433 return err; 433 434 434 - err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL); 435 + err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL); 435 436 if (err < 0) { 436 437 client->ops->close_channel(context); 437 438 return err; ··· 486 487 487 488 mutex_lock(&fpriv->lock); 488 489 489 - context = idr_find(&fpriv->contexts, args->context); 490 + context = idr_find(&fpriv->legacy_contexts, args->context); 490 491 if (!context) { 491 492 err = -EINVAL; 492 493 goto unlock; 493 494 } 494 495 495 - idr_remove(&fpriv->contexts, context->id); 496 + idr_remove(&fpriv->legacy_contexts, context->id); 496 497 tegra_drm_context_free(context); 497 498 498 499 unlock: ··· 511 512 512 513 mutex_lock(&fpriv->lock); 513 514 514 - context = idr_find(&fpriv->contexts, args->context); 515 + context = idr_find(&fpriv->legacy_contexts, args->context); 515 516 if (!context) { 516 517 err = -ENODEV; 517 518 goto unlock; ··· 540 541 541 542 mutex_lock(&fpriv->lock); 542 543 543 - context = idr_find(&fpriv->contexts, args->context); 544 + context = idr_find(&fpriv->legacy_contexts, args->context); 544 545 if (!context) { 545 546 err = -ENODEV; 546 547 goto unlock; ··· 565 566 566 567 mutex_lock(&fpriv->lock); 567 568 568 - context = idr_find(&fpriv->contexts, args->context); 569 + context = idr_find(&fpriv->legacy_contexts, args->context); 569 570 if (!context) { 570 571 err = -ENODEV; 571 572 goto unlock; ··· 734 735 735 736 static const struct drm_ioctl_desc tegra_drm_ioctls[] = { 736 737 #ifdef CONFIG_DRM_TEGRA_STAGING 737 - DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, 738 + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_OPEN, tegra_drm_ioctl_channel_open, 738 739 DRM_RENDER_ALLOW), 739 - DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, 740 + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_CLOSE, tegra_drm_ioctl_channel_close, 740 741 DRM_RENDER_ALLOW), 742 + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_MAP, tegra_drm_ioctl_channel_map, 743 + DRM_RENDER_ALLOW), 744 + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, 745 + DRM_RENDER_ALLOW), 746 + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit, 747 + DRM_RENDER_ALLOW), 748 + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate, 749 + DRM_RENDER_ALLOW), 750 + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free, 751 + DRM_RENDER_ALLOW), 752 + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_WAIT, tegra_drm_ioctl_syncpoint_wait, 753 + DRM_RENDER_ALLOW), 754 + 755 + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW), 756 + DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW), 741 757 DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read, 742 758 DRM_RENDER_ALLOW), 743 759 DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr, ··· 806 792 struct tegra_drm_file *fpriv = file->driver_priv; 807 793 808 794 mutex_lock(&fpriv->lock); 809 - idr_for_each(&fpriv->contexts, tegra_drm_context_cleanup, NULL); 795 + idr_for_each(&fpriv->legacy_contexts, tegra_drm_context_cleanup, NULL); 796 + tegra_drm_uapi_close_file(fpriv); 810 797 mutex_unlock(&fpriv->lock); 811 798 812 - idr_destroy(&fpriv->contexts); 799 + idr_destroy(&fpriv->legacy_contexts); 813 800 mutex_destroy(&fpriv->lock); 814 801 kfree(fpriv); 815 802 } ··· 868 853 869 854 static const struct drm_driver tegra_drm_driver = { 870 855 .driver_features = DRIVER_MODESET | DRIVER_GEM | 871 - DRIVER_ATOMIC | DRIVER_RENDER, 856 + DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ, 872 857 .open = tegra_drm_open, 873 858 .postclose = tegra_drm_postclose, 874 859 .lastclose = drm_fb_helper_lastclose, ··· 898 883 int tegra_drm_register_client(struct tegra_drm *tegra, 899 884 struct tegra_drm_client *client) 900 885 { 886 + /* 887 + * When MLOCKs are implemented, change to allocate a shared channel 888 + * only when MLOCKs are disabled. 889 + */ 890 + client->shared_channel = host1x_channel_request(&client->base); 891 + if (!client->shared_channel) 892 + return -EBUSY; 893 + 901 894 mutex_lock(&tegra->clients_lock); 902 895 list_add_tail(&client->list, &tegra->clients); 903 896 client->drm = tegra; ··· 921 898 list_del_init(&client->list); 922 899 client->drm = NULL; 923 900 mutex_unlock(&tegra->clients_lock); 901 + 902 + if (client->shared_channel) 903 + host1x_channel_put(client->shared_channel); 924 904 925 905 return 0; 926 906 }

+12

drivers/gpu/drm/tegra/drm.h

··· 64 64 struct tegra_display_hub *hub; 65 65 }; 66 66 67 + static inline struct host1x *tegra_drm_to_host1x(struct tegra_drm *tegra) 68 + { 69 + return dev_get_drvdata(tegra->drm->dev->parent); 70 + } 71 + 67 72 struct tegra_drm_client; 68 73 69 74 struct tegra_drm_context { 70 75 struct tegra_drm_client *client; 71 76 struct host1x_channel *channel; 77 + 78 + /* Only used by legacy UAPI. */ 72 79 unsigned int id; 80 + 81 + /* Only used by new UAPI. */ 82 + struct xarray mappings; 73 83 }; 74 84 75 85 struct tegra_drm_client_ops { ··· 101 91 struct host1x_client base; 102 92 struct list_head list; 103 93 struct tegra_drm *drm; 94 + struct host1x_channel *shared_channel; 104 95 96 + /* Set by driver */ 105 97 unsigned int version; 106 98 const struct tegra_drm_client_ops *ops; 107 99 };

+254

drivers/gpu/drm/tegra/firewall.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2010-2020 NVIDIA Corporation */ 3 + 4 + #include "drm.h" 5 + #include "submit.h" 6 + #include "uapi.h" 7 + 8 + struct tegra_drm_firewall { 9 + struct tegra_drm_submit_data *submit; 10 + struct tegra_drm_client *client; 11 + u32 *data; 12 + u32 pos; 13 + u32 end; 14 + u32 class; 15 + }; 16 + 17 + static int fw_next(struct tegra_drm_firewall *fw, u32 *word) 18 + { 19 + if (fw->pos == fw->end) 20 + return -EINVAL; 21 + 22 + *word = fw->data[fw->pos++]; 23 + 24 + return 0; 25 + } 26 + 27 + static bool fw_check_addr_valid(struct tegra_drm_firewall *fw, u32 offset) 28 + { 29 + u32 i; 30 + 31 + for (i = 0; i < fw->submit->num_used_mappings; i++) { 32 + struct tegra_drm_mapping *m = fw->submit->used_mappings[i].mapping; 33 + 34 + if (offset >= m->iova && offset <= m->iova_end) 35 + return true; 36 + } 37 + 38 + return false; 39 + } 40 + 41 + static int fw_check_reg(struct tegra_drm_firewall *fw, u32 offset) 42 + { 43 + bool is_addr; 44 + u32 word; 45 + int err; 46 + 47 + err = fw_next(fw, &word); 48 + if (err) 49 + return err; 50 + 51 + if (!fw->client->ops->is_addr_reg) 52 + return 0; 53 + 54 + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, 55 + offset); 56 + 57 + if (!is_addr) 58 + return 0; 59 + 60 + if (!fw_check_addr_valid(fw, word)) 61 + return -EINVAL; 62 + 63 + return 0; 64 + } 65 + 66 + static int fw_check_regs_seq(struct tegra_drm_firewall *fw, u32 offset, 67 + u32 count, bool incr) 68 + { 69 + u32 i; 70 + 71 + for (i = 0; i < count; i++) { 72 + if (fw_check_reg(fw, offset)) 73 + return -EINVAL; 74 + 75 + if (incr) 76 + offset++; 77 + } 78 + 79 + return 0; 80 + } 81 + 82 + static int fw_check_regs_mask(struct tegra_drm_firewall *fw, u32 offset, 83 + u16 mask) 84 + { 85 + unsigned long bmask = mask; 86 + unsigned int bit; 87 + 88 + for_each_set_bit(bit, &bmask, 16) { 89 + if (fw_check_reg(fw, offset+bit)) 90 + return -EINVAL; 91 + } 92 + 93 + return 0; 94 + } 95 + 96 + static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset) 97 + { 98 + bool is_addr; 99 + 100 + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, 101 + offset); 102 + if (is_addr) 103 + return -EINVAL; 104 + 105 + return 0; 106 + } 107 + 108 + static int fw_check_class(struct tegra_drm_firewall *fw, u32 class) 109 + { 110 + if (!fw->client->ops->is_valid_class) { 111 + if (class == fw->client->base.class) 112 + return 0; 113 + else 114 + return -EINVAL; 115 + } 116 + 117 + if (!fw->client->ops->is_valid_class(class)) 118 + return -EINVAL; 119 + 120 + return 0; 121 + } 122 + 123 + enum { 124 + HOST1X_OPCODE_SETCLASS = 0x00, 125 + HOST1X_OPCODE_INCR = 0x01, 126 + HOST1X_OPCODE_NONINCR = 0x02, 127 + HOST1X_OPCODE_MASK = 0x03, 128 + HOST1X_OPCODE_IMM = 0x04, 129 + HOST1X_OPCODE_RESTART = 0x05, 130 + HOST1X_OPCODE_GATHER = 0x06, 131 + HOST1X_OPCODE_SETSTRMID = 0x07, 132 + HOST1X_OPCODE_SETAPPID = 0x08, 133 + HOST1X_OPCODE_SETPYLD = 0x09, 134 + HOST1X_OPCODE_INCR_W = 0x0a, 135 + HOST1X_OPCODE_NONINCR_W = 0x0b, 136 + HOST1X_OPCODE_GATHER_W = 0x0c, 137 + HOST1X_OPCODE_RESTART_W = 0x0d, 138 + HOST1X_OPCODE_EXTEND = 0x0e, 139 + }; 140 + 141 + int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, 142 + u32 words, struct tegra_drm_submit_data *submit, 143 + u32 *job_class) 144 + { 145 + struct tegra_drm_firewall fw = { 146 + .submit = submit, 147 + .client = client, 148 + .data = data, 149 + .pos = start, 150 + .end = start+words, 151 + .class = *job_class, 152 + }; 153 + bool payload_valid = false; 154 + u32 payload; 155 + int err; 156 + 157 + while (fw.pos != fw.end) { 158 + u32 word, opcode, offset, count, mask, class; 159 + 160 + err = fw_next(&fw, &word); 161 + if (err) 162 + return err; 163 + 164 + opcode = (word & 0xf0000000) >> 28; 165 + 166 + switch (opcode) { 167 + case HOST1X_OPCODE_SETCLASS: 168 + offset = word >> 16 & 0xfff; 169 + mask = word & 0x3f; 170 + class = (word >> 6) & 0x3ff; 171 + err = fw_check_class(&fw, class); 172 + fw.class = class; 173 + *job_class = class; 174 + if (!err) 175 + err = fw_check_regs_mask(&fw, offset, mask); 176 + if (err) 177 + dev_warn(client->base.dev, 178 + "illegal SETCLASS(offset=0x%x, mask=0x%x, class=0x%x) at word %u", 179 + offset, mask, class, fw.pos-1); 180 + break; 181 + case HOST1X_OPCODE_INCR: 182 + offset = (word >> 16) & 0xfff; 183 + count = word & 0xffff; 184 + err = fw_check_regs_seq(&fw, offset, count, true); 185 + if (err) 186 + dev_warn(client->base.dev, 187 + "illegal INCR(offset=0x%x, count=%u) in class 0x%x at word %u", 188 + offset, count, fw.class, fw.pos-1); 189 + break; 190 + case HOST1X_OPCODE_NONINCR: 191 + offset = (word >> 16) & 0xfff; 192 + count = word & 0xffff; 193 + err = fw_check_regs_seq(&fw, offset, count, false); 194 + if (err) 195 + dev_warn(client->base.dev, 196 + "illegal NONINCR(offset=0x%x, count=%u) in class 0x%x at word %u", 197 + offset, count, fw.class, fw.pos-1); 198 + break; 199 + case HOST1X_OPCODE_MASK: 200 + offset = (word >> 16) & 0xfff; 201 + mask = word & 0xffff; 202 + err = fw_check_regs_mask(&fw, offset, mask); 203 + if (err) 204 + dev_warn(client->base.dev, 205 + "illegal MASK(offset=0x%x, mask=0x%x) in class 0x%x at word %u", 206 + offset, mask, fw.class, fw.pos-1); 207 + break; 208 + case HOST1X_OPCODE_IMM: 209 + /* IMM cannot reasonably be used to write a pointer */ 210 + offset = (word >> 16) & 0xfff; 211 + err = fw_check_regs_imm(&fw, offset); 212 + if (err) 213 + dev_warn(client->base.dev, 214 + "illegal IMM(offset=0x%x) in class 0x%x at word %u", 215 + offset, fw.class, fw.pos-1); 216 + break; 217 + case HOST1X_OPCODE_SETPYLD: 218 + payload = word & 0xffff; 219 + payload_valid = true; 220 + break; 221 + case HOST1X_OPCODE_INCR_W: 222 + if (!payload_valid) 223 + return -EINVAL; 224 + 225 + offset = word & 0x3fffff; 226 + err = fw_check_regs_seq(&fw, offset, payload, true); 227 + if (err) 228 + dev_warn(client->base.dev, 229 + "illegal INCR_W(offset=0x%x) in class 0x%x at word %u", 230 + offset, fw.class, fw.pos-1); 231 + break; 232 + case HOST1X_OPCODE_NONINCR_W: 233 + if (!payload_valid) 234 + return -EINVAL; 235 + 236 + offset = word & 0x3fffff; 237 + err = fw_check_regs_seq(&fw, offset, payload, false); 238 + if (err) 239 + dev_warn(client->base.dev, 240 + "illegal NONINCR(offset=0x%x) in class 0x%x at word %u", 241 + offset, fw.class, fw.pos-1); 242 + break; 243 + default: 244 + dev_warn(client->base.dev, "illegal opcode at word %u", 245 + fw.pos-1); 246 + return -EINVAL; 247 + } 248 + 249 + if (err) 250 + return err; 251 + } 252 + 253 + return 0; 254 + }

+13

drivers/gpu/drm/tegra/gem.c

··· 707 707 708 708 return &bo->gem; 709 709 } 710 + 711 + struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle) 712 + { 713 + struct drm_gem_object *gem; 714 + struct tegra_bo *bo; 715 + 716 + gem = drm_gem_object_lookup(file, handle); 717 + if (!gem) 718 + return NULL; 719 + 720 + bo = to_tegra_bo(gem); 721 + return &bo->base; 722 + }

+2

drivers/gpu/drm/tegra/gem.h

··· 80 80 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm, 81 81 struct dma_buf *buf); 82 82 83 + struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle); 84 + 83 85 #endif

+117

drivers/gpu/drm/tegra/plane.c

··· 4 4 */ 5 5 6 6 #include <linux/iommu.h> 7 + #include <linux/interconnect.h> 7 8 8 9 #include <drm/drm_atomic.h> 9 10 #include <drm/drm_atomic_helper.h> ··· 65 64 copy->reflect_x = state->reflect_x; 66 65 copy->reflect_y = state->reflect_y; 67 66 copy->opaque = state->opaque; 67 + copy->total_peak_memory_bandwidth = state->total_peak_memory_bandwidth; 68 + copy->peak_memory_bandwidth = state->peak_memory_bandwidth; 69 + copy->avg_memory_bandwidth = state->avg_memory_bandwidth; 68 70 69 71 for (i = 0; i < 2; i++) 70 72 copy->blending[i] = state->blending[i]; ··· 248 244 tegra_dc_unpin(dc, to_tegra_plane_state(state)); 249 245 } 250 246 247 + static int tegra_plane_calculate_memory_bandwidth(struct drm_plane_state *state) 248 + { 249 + struct tegra_plane_state *tegra_state = to_tegra_plane_state(state); 250 + unsigned int i, bpp, dst_w, dst_h, src_w, src_h, mul; 251 + const struct tegra_dc_soc_info *soc; 252 + const struct drm_format_info *fmt; 253 + struct drm_crtc_state *crtc_state; 254 + u64 avg_bandwidth, peak_bandwidth; 255 + 256 + if (!state->visible) 257 + return 0; 258 + 259 + crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 260 + if (!crtc_state) 261 + return -EINVAL; 262 + 263 + src_w = drm_rect_width(&state->src) >> 16; 264 + src_h = drm_rect_height(&state->src) >> 16; 265 + dst_w = drm_rect_width(&state->dst); 266 + dst_h = drm_rect_height(&state->dst); 267 + 268 + fmt = state->fb->format; 269 + soc = to_tegra_dc(state->crtc)->soc; 270 + 271 + /* 272 + * Note that real memory bandwidth vary depending on format and 273 + * memory layout, we are not taking that into account because small 274 + * estimation error isn't important since bandwidth is rounded up 275 + * anyway. 276 + */ 277 + for (i = 0, bpp = 0; i < fmt->num_planes; i++) { 278 + unsigned int bpp_plane = fmt->cpp[i] * 8; 279 + 280 + /* 281 + * Sub-sampling is relevant for chroma planes only and vertical 282 + * readouts are not cached, hence only horizontal sub-sampling 283 + * matters. 284 + */ 285 + if (i > 0) 286 + bpp_plane /= fmt->hsub; 287 + 288 + bpp += bpp_plane; 289 + } 290 + 291 + /* average bandwidth in kbytes/sec */ 292 + avg_bandwidth = min(src_w, dst_w) * min(src_h, dst_h); 293 + avg_bandwidth *= drm_mode_vrefresh(&crtc_state->adjusted_mode); 294 + avg_bandwidth = DIV_ROUND_UP(avg_bandwidth * bpp, 8) + 999; 295 + do_div(avg_bandwidth, 1000); 296 + 297 + /* mode.clock in kHz, peak bandwidth in kbytes/sec */ 298 + peak_bandwidth = DIV_ROUND_UP(crtc_state->adjusted_mode.clock * bpp, 8); 299 + 300 + /* 301 + * Tegra30/114 Memory Controller can't interleave DC memory requests 302 + * for the tiled windows because DC uses 16-bytes atom, while DDR3 303 + * uses 32-bytes atom. Hence there is x2 memory overfetch for tiled 304 + * framebuffer and DDR3 on these SoCs. 305 + */ 306 + if (soc->plane_tiled_memory_bandwidth_x2 && 307 + tegra_state->tiling.mode == TEGRA_BO_TILING_MODE_TILED) 308 + mul = 2; 309 + else 310 + mul = 1; 311 + 312 + /* ICC bandwidth in kbytes/sec */ 313 + tegra_state->peak_memory_bandwidth = kBps_to_icc(peak_bandwidth) * mul; 314 + tegra_state->avg_memory_bandwidth = kBps_to_icc(avg_bandwidth) * mul; 315 + 316 + return 0; 317 + } 318 + 251 319 int tegra_plane_state_add(struct tegra_plane *plane, 252 320 struct drm_plane_state *state) 253 321 { ··· 335 259 /* Check plane state for visibility and calculate clipping bounds */ 336 260 err = drm_atomic_helper_check_plane_state(state, crtc_state, 337 261 0, INT_MAX, true, true); 262 + if (err < 0) 263 + return err; 264 + 265 + err = tegra_plane_calculate_memory_bandwidth(state); 338 266 if (err < 0) 339 267 return err; 340 268 ··· 723 643 err = tegra_plane_setup_transparency(tegra, state); 724 644 if (err < 0) 725 645 return err; 646 + 647 + return 0; 648 + } 649 + 650 + static const char * const tegra_plane_icc_names[TEGRA_DC_LEGACY_PLANES_NUM] = { 651 + "wina", "winb", "winc", NULL, NULL, NULL, "cursor", 652 + }; 653 + 654 + int tegra_plane_interconnect_init(struct tegra_plane *plane) 655 + { 656 + const char *icc_name = tegra_plane_icc_names[plane->index]; 657 + struct device *dev = plane->dc->dev; 658 + struct tegra_dc *dc = plane->dc; 659 + int err; 660 + 661 + if (WARN_ON(plane->index >= TEGRA_DC_LEGACY_PLANES_NUM) || 662 + WARN_ON(!tegra_plane_icc_names[plane->index])) 663 + return -EINVAL; 664 + 665 + plane->icc_mem = devm_of_icc_get(dev, icc_name); 666 + err = PTR_ERR_OR_ZERO(plane->icc_mem); 667 + if (err) { 668 + dev_err_probe(dev, err, "failed to get %s interconnect\n", 669 + icc_name); 670 + return err; 671 + } 672 + 673 + /* plane B on T20/30 has a dedicated memory client for a 6-tap vertical filter */ 674 + if (plane->index == 1 && dc->soc->has_win_b_vfilter_mem_client) { 675 + plane->icc_mem_vfilter = devm_of_icc_get(dev, "winb-vfilter"); 676 + err = PTR_ERR_OR_ZERO(plane->icc_mem_vfilter); 677 + if (err) { 678 + dev_err_probe(dev, err, "failed to get %s interconnect\n", 679 + "winb-vfilter"); 680 + return err; 681 + } 682 + } 726 683 727 684 return 0; 728 685 }

+16

drivers/gpu/drm/tegra/plane.h

··· 8 8 9 9 #include <drm/drm_plane.h> 10 10 11 + struct icc_path; 11 12 struct tegra_bo; 12 13 struct tegra_dc; 13 14 ··· 17 16 struct tegra_dc *dc; 18 17 unsigned int offset; 19 18 unsigned int index; 19 + 20 + struct icc_path *icc_mem; 21 + struct icc_path *icc_mem_vfilter; 20 22 }; 21 23 22 24 struct tegra_cursor { ··· 56 52 /* used for legacy blending support only */ 57 53 struct tegra_plane_legacy_blending_state blending[2]; 58 54 bool opaque; 55 + 56 + /* bandwidths are in ICC units, i.e. kbytes/sec */ 57 + u32 total_peak_memory_bandwidth; 58 + u32 peak_memory_bandwidth; 59 + u32 avg_memory_bandwidth; 59 60 }; 60 61 61 62 static inline struct tegra_plane_state * ··· 70 61 return container_of(state, struct tegra_plane_state, base); 71 62 72 63 return NULL; 64 + } 65 + 66 + static inline const struct tegra_plane_state * 67 + to_const_tegra_plane_state(const struct drm_plane_state *state) 68 + { 69 + return to_tegra_plane_state((struct drm_plane_state *)state); 73 70 } 74 71 75 72 extern const struct drm_plane_funcs tegra_plane_funcs; ··· 93 78 bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc); 94 79 int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, 95 80 struct tegra_plane_state *state); 81 + int tegra_plane_interconnect_init(struct tegra_plane *plane); 96 82 97 83 #endif /* TEGRA_PLANE_H */

+625

drivers/gpu/drm/tegra/submit.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2020 NVIDIA Corporation */ 3 + 4 + #include <linux/dma-fence-array.h> 5 + #include <linux/dma-mapping.h> 6 + #include <linux/file.h> 7 + #include <linux/host1x.h> 8 + #include <linux/iommu.h> 9 + #include <linux/kref.h> 10 + #include <linux/list.h> 11 + #include <linux/nospec.h> 12 + #include <linux/pm_runtime.h> 13 + #include <linux/scatterlist.h> 14 + #include <linux/slab.h> 15 + #include <linux/sync_file.h> 16 + 17 + #include <drm/drm_drv.h> 18 + #include <drm/drm_file.h> 19 + #include <drm/drm_syncobj.h> 20 + 21 + #include "drm.h" 22 + #include "gem.h" 23 + #include "submit.h" 24 + #include "uapi.h" 25 + 26 + #define SUBMIT_ERR(context, fmt, ...) \ 27 + dev_err_ratelimited(context->client->base.dev, \ 28 + "%s: job submission failed: " fmt "\n", \ 29 + current->comm, ##__VA_ARGS__) 30 + 31 + struct gather_bo { 32 + struct host1x_bo base; 33 + 34 + struct kref ref; 35 + 36 + struct device *dev; 37 + u32 *gather_data; 38 + dma_addr_t gather_data_dma; 39 + size_t gather_data_words; 40 + }; 41 + 42 + static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo) 43 + { 44 + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); 45 + 46 + kref_get(&bo->ref); 47 + 48 + return host_bo; 49 + } 50 + 51 + static void gather_bo_release(struct kref *ref) 52 + { 53 + struct gather_bo *bo = container_of(ref, struct gather_bo, ref); 54 + 55 + dma_free_attrs(bo->dev, bo->gather_data_words * 4, bo->gather_data, bo->gather_data_dma, 56 + 0); 57 + kfree(bo); 58 + } 59 + 60 + static void gather_bo_put(struct host1x_bo *host_bo) 61 + { 62 + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); 63 + 64 + kref_put(&bo->ref, gather_bo_release); 65 + } 66 + 67 + static struct sg_table * 68 + gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) 69 + { 70 + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); 71 + struct sg_table *sgt; 72 + int err; 73 + 74 + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); 75 + if (!sgt) 76 + return ERR_PTR(-ENOMEM); 77 + 78 + err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma, 79 + bo->gather_data_words * 4); 80 + if (err) { 81 + kfree(sgt); 82 + return ERR_PTR(err); 83 + } 84 + 85 + return sgt; 86 + } 87 + 88 + static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) 89 + { 90 + if (sgt) { 91 + sg_free_table(sgt); 92 + kfree(sgt); 93 + } 94 + } 95 + 96 + static void *gather_bo_mmap(struct host1x_bo *host_bo) 97 + { 98 + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); 99 + 100 + return bo->gather_data; 101 + } 102 + 103 + static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr) 104 + { 105 + } 106 + 107 + const struct host1x_bo_ops gather_bo_ops = { 108 + .get = gather_bo_get, 109 + .put = gather_bo_put, 110 + .pin = gather_bo_pin, 111 + .unpin = gather_bo_unpin, 112 + .mmap = gather_bo_mmap, 113 + .munmap = gather_bo_munmap, 114 + }; 115 + 116 + static struct tegra_drm_mapping * 117 + tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id) 118 + { 119 + struct tegra_drm_mapping *mapping; 120 + 121 + xa_lock(&context->mappings); 122 + 123 + mapping = xa_load(&context->mappings, id); 124 + if (mapping) 125 + kref_get(&mapping->ref); 126 + 127 + xa_unlock(&context->mappings); 128 + 129 + return mapping; 130 + } 131 + 132 + static void *alloc_copy_user_array(void __user *from, size_t count, size_t size) 133 + { 134 + size_t copy_len; 135 + void *data; 136 + 137 + if (check_mul_overflow(count, size, &copy_len)) 138 + return ERR_PTR(-EINVAL); 139 + 140 + if (copy_len > 0x4000) 141 + return ERR_PTR(-E2BIG); 142 + 143 + data = kvmalloc(copy_len, GFP_KERNEL); 144 + if (!data) 145 + return ERR_PTR(-ENOMEM); 146 + 147 + if (copy_from_user(data, from, copy_len)) { 148 + kvfree(data); 149 + return ERR_PTR(-EFAULT); 150 + } 151 + 152 + return data; 153 + } 154 + 155 + static int submit_copy_gather_data(struct gather_bo **pbo, struct device *dev, 156 + struct tegra_drm_context *context, 157 + struct drm_tegra_channel_submit *args) 158 + { 159 + struct gather_bo *bo; 160 + size_t copy_len; 161 + 162 + if (args->gather_data_words == 0) { 163 + SUBMIT_ERR(context, "gather_data_words cannot be zero"); 164 + return -EINVAL; 165 + } 166 + 167 + if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len)) { 168 + SUBMIT_ERR(context, "gather_data_words is too large"); 169 + return -EINVAL; 170 + } 171 + 172 + bo = kzalloc(sizeof(*bo), GFP_KERNEL); 173 + if (!bo) { 174 + SUBMIT_ERR(context, "failed to allocate memory for bo info"); 175 + return -ENOMEM; 176 + } 177 + 178 + host1x_bo_init(&bo->base, &gather_bo_ops); 179 + kref_init(&bo->ref); 180 + bo->dev = dev; 181 + 182 + bo->gather_data = dma_alloc_attrs(dev, copy_len, &bo->gather_data_dma, 183 + GFP_KERNEL | __GFP_NOWARN, 0); 184 + if (!bo->gather_data) { 185 + SUBMIT_ERR(context, "failed to allocate memory for gather data"); 186 + kfree(bo); 187 + return -ENOMEM; 188 + } 189 + 190 + if (copy_from_user(bo->gather_data, u64_to_user_ptr(args->gather_data_ptr), copy_len)) { 191 + SUBMIT_ERR(context, "failed to copy gather data from userspace"); 192 + dma_free_attrs(dev, copy_len, bo->gather_data, bo->gather_data_dma, 0); 193 + kfree(bo); 194 + return -EFAULT; 195 + } 196 + 197 + bo->gather_data_words = args->gather_data_words; 198 + 199 + *pbo = bo; 200 + 201 + return 0; 202 + } 203 + 204 + static int submit_write_reloc(struct tegra_drm_context *context, struct gather_bo *bo, 205 + struct drm_tegra_submit_buf *buf, struct tegra_drm_mapping *mapping) 206 + { 207 + /* TODO check that target_offset is within bounds */ 208 + dma_addr_t iova = mapping->iova + buf->reloc.target_offset; 209 + u32 written_ptr; 210 + 211 + #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT 212 + if (buf->flags & DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) 213 + iova |= BIT_ULL(39); 214 + #endif 215 + 216 + written_ptr = iova >> buf->reloc.shift; 217 + 218 + if (buf->reloc.gather_offset_words >= bo->gather_data_words) { 219 + SUBMIT_ERR(context, 220 + "relocation has too large gather offset (%u vs gather length %zu)", 221 + buf->reloc.gather_offset_words, bo->gather_data_words); 222 + return -EINVAL; 223 + } 224 + 225 + buf->reloc.gather_offset_words = array_index_nospec(buf->reloc.gather_offset_words, 226 + bo->gather_data_words); 227 + 228 + bo->gather_data[buf->reloc.gather_offset_words] = written_ptr; 229 + 230 + return 0; 231 + } 232 + 233 + static int submit_process_bufs(struct tegra_drm_context *context, struct gather_bo *bo, 234 + struct drm_tegra_channel_submit *args, 235 + struct tegra_drm_submit_data *job_data) 236 + { 237 + struct tegra_drm_used_mapping *mappings; 238 + struct drm_tegra_submit_buf *bufs; 239 + int err; 240 + u32 i; 241 + 242 + bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr), args->num_bufs, 243 + sizeof(*bufs)); 244 + if (IS_ERR(bufs)) { 245 + SUBMIT_ERR(context, "failed to copy bufs array from userspace"); 246 + return PTR_ERR(bufs); 247 + } 248 + 249 + mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL); 250 + if (!mappings) { 251 + SUBMIT_ERR(context, "failed to allocate memory for mapping info"); 252 + err = -ENOMEM; 253 + goto done; 254 + } 255 + 256 + for (i = 0; i < args->num_bufs; i++) { 257 + struct drm_tegra_submit_buf *buf = &bufs[i]; 258 + struct tegra_drm_mapping *mapping; 259 + 260 + if (buf->flags & ~DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) { 261 + SUBMIT_ERR(context, "invalid flag specified for buffer"); 262 + err = -EINVAL; 263 + goto drop_refs; 264 + } 265 + 266 + mapping = tegra_drm_mapping_get(context, buf->mapping); 267 + if (!mapping) { 268 + SUBMIT_ERR(context, "invalid mapping ID '%u' for buffer", buf->mapping); 269 + err = -EINVAL; 270 + goto drop_refs; 271 + } 272 + 273 + err = submit_write_reloc(context, bo, buf, mapping); 274 + if (err) { 275 + tegra_drm_mapping_put(mapping); 276 + goto drop_refs; 277 + } 278 + 279 + mappings[i].mapping = mapping; 280 + mappings[i].flags = buf->flags; 281 + } 282 + 283 + job_data->used_mappings = mappings; 284 + job_data->num_used_mappings = i; 285 + 286 + err = 0; 287 + 288 + goto done; 289 + 290 + drop_refs: 291 + while (i--) 292 + tegra_drm_mapping_put(mappings[i].mapping); 293 + 294 + kfree(mappings); 295 + job_data->used_mappings = NULL; 296 + 297 + done: 298 + kvfree(bufs); 299 + 300 + return err; 301 + } 302 + 303 + static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_job *job, 304 + struct xarray *syncpoints, struct drm_tegra_channel_submit *args) 305 + { 306 + struct host1x_syncpt *sp; 307 + 308 + if (args->syncpt.flags) { 309 + SUBMIT_ERR(context, "invalid flag specified for syncpt"); 310 + return -EINVAL; 311 + } 312 + 313 + /* Syncpt ref will be dropped on job release */ 314 + sp = xa_load(syncpoints, args->syncpt.id); 315 + if (!sp) { 316 + SUBMIT_ERR(context, "syncpoint specified in syncpt was not allocated"); 317 + return -EINVAL; 318 + } 319 + 320 + job->syncpt = host1x_syncpt_get(sp); 321 + job->syncpt_incrs = args->syncpt.increments; 322 + 323 + return 0; 324 + } 325 + 326 + static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context, 327 + struct drm_tegra_submit_cmd_gather_uptr *cmd, 328 + struct gather_bo *bo, u32 *offset, 329 + struct tegra_drm_submit_data *job_data, 330 + u32 *class) 331 + { 332 + u32 next_offset; 333 + 334 + if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) { 335 + SUBMIT_ERR(context, "non-zero reserved field in GATHER_UPTR command"); 336 + return -EINVAL; 337 + } 338 + 339 + /* Check for maximum gather size */ 340 + if (cmd->words > 16383) { 341 + SUBMIT_ERR(context, "too many words in GATHER_UPTR command"); 342 + return -EINVAL; 343 + } 344 + 345 + if (check_add_overflow(*offset, cmd->words, &next_offset)) { 346 + SUBMIT_ERR(context, "too many total words in job"); 347 + return -EINVAL; 348 + } 349 + 350 + if (next_offset > bo->gather_data_words) { 351 + SUBMIT_ERR(context, "GATHER_UPTR command overflows gather data"); 352 + return -EINVAL; 353 + } 354 + 355 + if (tegra_drm_fw_validate(context->client, bo->gather_data, *offset, 356 + cmd->words, job_data, class)) { 357 + SUBMIT_ERR(context, "job was rejected by firewall"); 358 + return -EINVAL; 359 + } 360 + 361 + host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4); 362 + 363 + *offset = next_offset; 364 + 365 + return 0; 366 + } 367 + 368 + static struct host1x_job * 369 + submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo, 370 + struct drm_tegra_channel_submit *args, struct tegra_drm_submit_data *job_data, 371 + struct xarray *syncpoints) 372 + { 373 + struct drm_tegra_submit_cmd *cmds; 374 + u32 i, gather_offset = 0, class; 375 + struct host1x_job *job; 376 + int err; 377 + 378 + /* Set initial class for firewall. */ 379 + class = context->client->base.class; 380 + 381 + cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr), args->num_cmds, 382 + sizeof(*cmds)); 383 + if (IS_ERR(cmds)) { 384 + SUBMIT_ERR(context, "failed to copy cmds array from userspace"); 385 + return ERR_CAST(cmds); 386 + } 387 + 388 + job = host1x_job_alloc(context->channel, args->num_cmds, 0, true); 389 + if (!job) { 390 + SUBMIT_ERR(context, "failed to allocate memory for job"); 391 + job = ERR_PTR(-ENOMEM); 392 + goto done; 393 + } 394 + 395 + err = submit_get_syncpt(context, job, syncpoints, args); 396 + if (err < 0) 397 + goto free_job; 398 + 399 + job->client = &context->client->base; 400 + job->class = context->client->base.class; 401 + job->serialize = true; 402 + 403 + for (i = 0; i < args->num_cmds; i++) { 404 + struct drm_tegra_submit_cmd *cmd = &cmds[i]; 405 + 406 + if (cmd->flags) { 407 + SUBMIT_ERR(context, "unknown flags given for cmd"); 408 + err = -EINVAL; 409 + goto free_job; 410 + } 411 + 412 + if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) { 413 + err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo, 414 + &gather_offset, job_data, &class); 415 + if (err) 416 + goto free_job; 417 + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) { 418 + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { 419 + SUBMIT_ERR(context, "non-zero reserved value"); 420 + err = -EINVAL; 421 + goto free_job; 422 + } 423 + 424 + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, 425 + false, class); 426 + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) { 427 + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { 428 + SUBMIT_ERR(context, "non-zero reserved value"); 429 + err = -EINVAL; 430 + goto free_job; 431 + } 432 + 433 + if (cmd->wait_syncpt.id != args->syncpt.id) { 434 + SUBMIT_ERR(context, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job"); 435 + err = -EINVAL; 436 + goto free_job; 437 + } 438 + 439 + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, 440 + true, class); 441 + } else { 442 + SUBMIT_ERR(context, "unknown cmd type"); 443 + err = -EINVAL; 444 + goto free_job; 445 + } 446 + } 447 + 448 + if (gather_offset == 0) { 449 + SUBMIT_ERR(context, "job must have at least one gather"); 450 + err = -EINVAL; 451 + goto free_job; 452 + } 453 + 454 + goto done; 455 + 456 + free_job: 457 + host1x_job_put(job); 458 + job = ERR_PTR(err); 459 + 460 + done: 461 + kvfree(cmds); 462 + 463 + return job; 464 + } 465 + 466 + static void release_job(struct host1x_job *job) 467 + { 468 + struct tegra_drm_client *client = container_of(job->client, struct tegra_drm_client, base); 469 + struct tegra_drm_submit_data *job_data = job->user_data; 470 + u32 i; 471 + 472 + for (i = 0; i < job_data->num_used_mappings; i++) 473 + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); 474 + 475 + kfree(job_data->used_mappings); 476 + kfree(job_data); 477 + 478 + if (pm_runtime_enabled(client->base.dev)) 479 + pm_runtime_put_autosuspend(client->base.dev); 480 + } 481 + 482 + int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, 483 + struct drm_file *file) 484 + { 485 + struct tegra_drm_file *fpriv = file->driver_priv; 486 + struct drm_tegra_channel_submit *args = data; 487 + struct tegra_drm_submit_data *job_data; 488 + struct drm_syncobj *syncobj = NULL; 489 + struct tegra_drm_context *context; 490 + struct host1x_job *job; 491 + struct gather_bo *bo; 492 + u32 i; 493 + int err; 494 + 495 + mutex_lock(&fpriv->lock); 496 + 497 + context = xa_load(&fpriv->contexts, args->context); 498 + if (!context) { 499 + mutex_unlock(&fpriv->lock); 500 + pr_err_ratelimited("%s: %s: invalid channel context '%#x'", __func__, 501 + current->comm, args->context); 502 + return -EINVAL; 503 + } 504 + 505 + if (args->syncobj_in) { 506 + struct dma_fence *fence; 507 + 508 + err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence); 509 + if (err) { 510 + SUBMIT_ERR(context, "invalid syncobj_in '%#x'", args->syncobj_in); 511 + goto unlock; 512 + } 513 + 514 + err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000)); 515 + dma_fence_put(fence); 516 + if (err) { 517 + SUBMIT_ERR(context, "wait for syncobj_in timed out"); 518 + goto unlock; 519 + } 520 + } 521 + 522 + if (args->syncobj_out) { 523 + syncobj = drm_syncobj_find(file, args->syncobj_out); 524 + if (!syncobj) { 525 + SUBMIT_ERR(context, "invalid syncobj_out '%#x'", args->syncobj_out); 526 + err = -ENOENT; 527 + goto unlock; 528 + } 529 + } 530 + 531 + /* Allocate gather BO and copy gather words in. */ 532 + err = submit_copy_gather_data(&bo, drm->dev, context, args); 533 + if (err) 534 + goto unlock; 535 + 536 + job_data = kzalloc(sizeof(*job_data), GFP_KERNEL); 537 + if (!job_data) { 538 + SUBMIT_ERR(context, "failed to allocate memory for job data"); 539 + err = -ENOMEM; 540 + goto put_bo; 541 + } 542 + 543 + /* Get data buffer mappings and do relocation patching. */ 544 + err = submit_process_bufs(context, bo, args, job_data); 545 + if (err) 546 + goto free_job_data; 547 + 548 + /* Allocate host1x_job and add gathers and waits to it. */ 549 + job = submit_create_job(context, bo, args, job_data, &fpriv->syncpoints); 550 + if (IS_ERR(job)) { 551 + err = PTR_ERR(job); 552 + goto free_job_data; 553 + } 554 + 555 + /* Map gather data for Host1x. */ 556 + err = host1x_job_pin(job, context->client->base.dev); 557 + if (err) { 558 + SUBMIT_ERR(context, "failed to pin job: %d", err); 559 + goto put_job; 560 + } 561 + 562 + /* Boot engine. */ 563 + if (pm_runtime_enabled(context->client->base.dev)) { 564 + err = pm_runtime_resume_and_get(context->client->base.dev); 565 + if (err < 0) { 566 + SUBMIT_ERR(context, "could not power up engine: %d", err); 567 + goto unpin_job; 568 + } 569 + } 570 + 571 + job->user_data = job_data; 572 + job->release = release_job; 573 + job->timeout = 10000; 574 + 575 + /* 576 + * job_data is now part of job reference counting, so don't release 577 + * it from here. 578 + */ 579 + job_data = NULL; 580 + 581 + /* Submit job to hardware. */ 582 + err = host1x_job_submit(job); 583 + if (err) { 584 + SUBMIT_ERR(context, "host1x job submission failed: %d", err); 585 + goto unpin_job; 586 + } 587 + 588 + /* Return postfences to userspace and add fences to DMA reservations. */ 589 + args->syncpt.value = job->syncpt_end; 590 + 591 + if (syncobj) { 592 + struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end); 593 + if (IS_ERR(fence)) { 594 + err = PTR_ERR(fence); 595 + SUBMIT_ERR(context, "failed to create postfence: %d", err); 596 + } 597 + 598 + drm_syncobj_replace_fence(syncobj, fence); 599 + } 600 + 601 + goto put_job; 602 + 603 + unpin_job: 604 + host1x_job_unpin(job); 605 + put_job: 606 + host1x_job_put(job); 607 + free_job_data: 608 + if (job_data && job_data->used_mappings) { 609 + for (i = 0; i < job_data->num_used_mappings; i++) 610 + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); 611 + 612 + kfree(job_data->used_mappings); 613 + } 614 + 615 + if (job_data) 616 + kfree(job_data); 617 + put_bo: 618 + gather_bo_put(&bo->base); 619 + unlock: 620 + if (syncobj) 621 + drm_syncobj_put(syncobj); 622 + 623 + mutex_unlock(&fpriv->lock); 624 + return err; 625 + }

+21

drivers/gpu/drm/tegra/submit.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* Copyright (c) 2020 NVIDIA Corporation */ 3 + 4 + #ifndef _TEGRA_DRM_UAPI_SUBMIT_H 5 + #define _TEGRA_DRM_UAPI_SUBMIT_H 6 + 7 + struct tegra_drm_used_mapping { 8 + struct tegra_drm_mapping *mapping; 9 + u32 flags; 10 + }; 11 + 12 + struct tegra_drm_submit_data { 13 + struct tegra_drm_used_mapping *used_mappings; 14 + u32 num_used_mappings; 15 + }; 16 + 17 + int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, 18 + u32 words, struct tegra_drm_submit_data *submit, 19 + u32 *job_class); 20 + 21 + #endif

+338

drivers/gpu/drm/tegra/uapi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2020 NVIDIA Corporation */ 3 + 4 + #include <linux/host1x.h> 5 + #include <linux/iommu.h> 6 + #include <linux/list.h> 7 + 8 + #include <drm/drm_drv.h> 9 + #include <drm/drm_file.h> 10 + #include <drm/drm_utils.h> 11 + 12 + #include "drm.h" 13 + #include "uapi.h" 14 + 15 + static void tegra_drm_mapping_release(struct kref *ref) 16 + { 17 + struct tegra_drm_mapping *mapping = 18 + container_of(ref, struct tegra_drm_mapping, ref); 19 + 20 + if (mapping->sgt) 21 + dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, 22 + DMA_ATTR_SKIP_CPU_SYNC); 23 + 24 + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); 25 + host1x_bo_put(mapping->bo); 26 + 27 + kfree(mapping); 28 + } 29 + 30 + void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping) 31 + { 32 + kref_put(&mapping->ref, tegra_drm_mapping_release); 33 + } 34 + 35 + static void tegra_drm_channel_context_close(struct tegra_drm_context *context) 36 + { 37 + struct tegra_drm_mapping *mapping; 38 + unsigned long id; 39 + 40 + xa_for_each(&context->mappings, id, mapping) 41 + tegra_drm_mapping_put(mapping); 42 + 43 + xa_destroy(&context->mappings); 44 + 45 + host1x_channel_put(context->channel); 46 + 47 + kfree(context); 48 + } 49 + 50 + void tegra_drm_uapi_close_file(struct tegra_drm_file *file) 51 + { 52 + struct tegra_drm_context *context; 53 + struct host1x_syncpt *sp; 54 + unsigned long id; 55 + 56 + xa_for_each(&file->contexts, id, context) 57 + tegra_drm_channel_context_close(context); 58 + 59 + xa_for_each(&file->syncpoints, id, sp) 60 + host1x_syncpt_put(sp); 61 + 62 + xa_destroy(&file->contexts); 63 + xa_destroy(&file->syncpoints); 64 + } 65 + 66 + static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, u32 class) 67 + { 68 + struct tegra_drm_client *client; 69 + 70 + list_for_each_entry(client, &tegra->clients, list) 71 + if (client->base.class == class) 72 + return client; 73 + 74 + return NULL; 75 + } 76 + 77 + int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_file *file) 78 + { 79 + struct tegra_drm_file *fpriv = file->driver_priv; 80 + struct tegra_drm *tegra = drm->dev_private; 81 + struct drm_tegra_channel_open *args = data; 82 + struct tegra_drm_client *client = NULL; 83 + struct tegra_drm_context *context; 84 + int err; 85 + 86 + if (args->flags) 87 + return -EINVAL; 88 + 89 + context = kzalloc(sizeof(*context), GFP_KERNEL); 90 + if (!context) 91 + return -ENOMEM; 92 + 93 + client = tegra_drm_find_client(tegra, args->host1x_class); 94 + if (!client) { 95 + err = -ENODEV; 96 + goto free; 97 + } 98 + 99 + if (client->shared_channel) { 100 + context->channel = host1x_channel_get(client->shared_channel); 101 + } else { 102 + context->channel = host1x_channel_request(&client->base); 103 + if (!context->channel) { 104 + err = -EBUSY; 105 + goto free; 106 + } 107 + } 108 + 109 + err = xa_alloc(&fpriv->contexts, &args->context, context, XA_LIMIT(1, U32_MAX), 110 + GFP_KERNEL); 111 + if (err < 0) 112 + goto put_channel; 113 + 114 + context->client = client; 115 + xa_init_flags(&context->mappings, XA_FLAGS_ALLOC1); 116 + 117 + args->version = client->version; 118 + args->capabilities = 0; 119 + 120 + if (device_get_dma_attr(client->base.dev) == DEV_DMA_COHERENT) 121 + args->capabilities |= DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT; 122 + 123 + return 0; 124 + 125 + put_channel: 126 + host1x_channel_put(context->channel); 127 + free: 128 + kfree(context); 129 + 130 + return err; 131 + } 132 + 133 + int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, struct drm_file *file) 134 + { 135 + struct tegra_drm_file *fpriv = file->driver_priv; 136 + struct drm_tegra_channel_close *args = data; 137 + struct tegra_drm_context *context; 138 + 139 + mutex_lock(&fpriv->lock); 140 + 141 + context = xa_load(&fpriv->contexts, args->context); 142 + if (!context) { 143 + mutex_unlock(&fpriv->lock); 144 + return -EINVAL; 145 + } 146 + 147 + xa_erase(&fpriv->contexts, args->context); 148 + 149 + mutex_unlock(&fpriv->lock); 150 + 151 + tegra_drm_channel_context_close(context); 152 + 153 + return 0; 154 + } 155 + 156 + int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_file *file) 157 + { 158 + struct tegra_drm_file *fpriv = file->driver_priv; 159 + struct drm_tegra_channel_map *args = data; 160 + struct tegra_drm_mapping *mapping; 161 + struct tegra_drm_context *context; 162 + int err = 0; 163 + 164 + if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) 165 + return -EINVAL; 166 + 167 + mutex_lock(&fpriv->lock); 168 + 169 + context = xa_load(&fpriv->contexts, args->context); 170 + if (!context) { 171 + mutex_unlock(&fpriv->lock); 172 + return -EINVAL; 173 + } 174 + 175 + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); 176 + if (!mapping) { 177 + err = -ENOMEM; 178 + goto unlock; 179 + } 180 + 181 + kref_init(&mapping->ref); 182 + 183 + mapping->dev = context->client->base.dev; 184 + mapping->bo = tegra_gem_lookup(file, args->handle); 185 + if (!mapping->bo) { 186 + err = -EINVAL; 187 + goto unlock; 188 + } 189 + 190 + if (context->client->base.group) { 191 + /* IOMMU domain managed directly using IOMMU API */ 192 + host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); 193 + } else { 194 + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { 195 + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: 196 + mapping->direction = DMA_BIDIRECTIONAL; 197 + break; 198 + 199 + case DRM_TEGRA_CHANNEL_MAP_WRITE: 200 + mapping->direction = DMA_FROM_DEVICE; 201 + break; 202 + 203 + case DRM_TEGRA_CHANNEL_MAP_READ: 204 + mapping->direction = DMA_TO_DEVICE; 205 + break; 206 + 207 + default: 208 + return -EINVAL; 209 + } 210 + 211 + mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); 212 + if (IS_ERR(mapping->sgt)) { 213 + err = PTR_ERR(mapping->sgt); 214 + goto put_gem; 215 + } 216 + 217 + err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction, 218 + DMA_ATTR_SKIP_CPU_SYNC); 219 + if (err) 220 + goto unpin; 221 + 222 + mapping->iova = sg_dma_address(mapping->sgt->sgl); 223 + } 224 + 225 + mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->size; 226 + 227 + err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), 228 + GFP_KERNEL); 229 + if (err < 0) 230 + goto unmap; 231 + 232 + mutex_unlock(&fpriv->lock); 233 + 234 + return 0; 235 + 236 + unmap: 237 + if (mapping->sgt) { 238 + dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, 239 + DMA_ATTR_SKIP_CPU_SYNC); 240 + } 241 + unpin: 242 + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); 243 + put_gem: 244 + host1x_bo_put(mapping->bo); 245 + kfree(mapping); 246 + unlock: 247 + mutex_unlock(&fpriv->lock); 248 + return err; 249 + } 250 + 251 + int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, struct drm_file *file) 252 + { 253 + struct tegra_drm_file *fpriv = file->driver_priv; 254 + struct drm_tegra_channel_unmap *args = data; 255 + struct tegra_drm_mapping *mapping; 256 + struct tegra_drm_context *context; 257 + 258 + mutex_lock(&fpriv->lock); 259 + 260 + context = xa_load(&fpriv->contexts, args->context); 261 + if (!context) { 262 + mutex_unlock(&fpriv->lock); 263 + return -EINVAL; 264 + } 265 + 266 + mapping = xa_erase(&context->mappings, args->mapping); 267 + 268 + mutex_unlock(&fpriv->lock); 269 + 270 + if (!mapping) 271 + return -EINVAL; 272 + 273 + tegra_drm_mapping_put(mapping); 274 + return 0; 275 + } 276 + 277 + int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, struct drm_file *file) 278 + { 279 + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); 280 + struct tegra_drm_file *fpriv = file->driver_priv; 281 + struct drm_tegra_syncpoint_allocate *args = data; 282 + struct host1x_syncpt *sp; 283 + int err; 284 + 285 + if (args->id) 286 + return -EINVAL; 287 + 288 + sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, current->comm); 289 + if (!sp) 290 + return -EBUSY; 291 + 292 + args->id = host1x_syncpt_id(sp); 293 + 294 + err = xa_insert(&fpriv->syncpoints, args->id, sp, GFP_KERNEL); 295 + if (err) { 296 + host1x_syncpt_put(sp); 297 + return err; 298 + } 299 + 300 + return 0; 301 + } 302 + 303 + int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, struct drm_file *file) 304 + { 305 + struct tegra_drm_file *fpriv = file->driver_priv; 306 + struct drm_tegra_syncpoint_allocate *args = data; 307 + struct host1x_syncpt *sp; 308 + 309 + mutex_lock(&fpriv->lock); 310 + sp = xa_erase(&fpriv->syncpoints, args->id); 311 + mutex_unlock(&fpriv->lock); 312 + 313 + if (!sp) 314 + return -EINVAL; 315 + 316 + host1x_syncpt_put(sp); 317 + 318 + return 0; 319 + } 320 + 321 + int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, struct drm_file *file) 322 + { 323 + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); 324 + struct drm_tegra_syncpoint_wait *args = data; 325 + signed long timeout_jiffies; 326 + struct host1x_syncpt *sp; 327 + 328 + if (args->padding != 0) 329 + return -EINVAL; 330 + 331 + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); 332 + if (!sp) 333 + return -EINVAL; 334 + 335 + timeout_jiffies = drm_timeout_abs_to_jiffies(args->timeout_ns); 336 + 337 + return host1x_syncpt_wait(sp, args->threshold, timeout_jiffies, &args->value); 338 + }

+58

drivers/gpu/drm/tegra/uapi.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* Copyright (c) 2020 NVIDIA Corporation */ 3 + 4 + #ifndef _TEGRA_DRM_UAPI_H 5 + #define _TEGRA_DRM_UAPI_H 6 + 7 + #include <linux/dma-mapping.h> 8 + #include <linux/idr.h> 9 + #include <linux/kref.h> 10 + #include <linux/xarray.h> 11 + 12 + #include <drm/drm.h> 13 + 14 + struct drm_file; 15 + struct drm_device; 16 + 17 + struct tegra_drm_file { 18 + /* Legacy UAPI state */ 19 + struct idr legacy_contexts; 20 + struct mutex lock; 21 + 22 + /* New UAPI state */ 23 + struct xarray contexts; 24 + struct xarray syncpoints; 25 + }; 26 + 27 + struct tegra_drm_mapping { 28 + struct kref ref; 29 + 30 + struct device *dev; 31 + struct host1x_bo *bo; 32 + struct sg_table *sgt; 33 + enum dma_data_direction direction; 34 + dma_addr_t iova; 35 + dma_addr_t iova_end; 36 + }; 37 + 38 + int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, 39 + struct drm_file *file); 40 + int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, 41 + struct drm_file *file); 42 + int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, 43 + struct drm_file *file); 44 + int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, 45 + struct drm_file *file); 46 + int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, 47 + struct drm_file *file); 48 + int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, 49 + struct drm_file *file); 50 + int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, 51 + struct drm_file *file); 52 + int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, 53 + struct drm_file *file); 54 + 55 + void tegra_drm_uapi_close_file(struct tegra_drm_file *file); 56 + void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping); 57 + 58 + #endif

+53 -63

drivers/gpu/drm/tegra/vic.c

··· 29 29 30 30 struct vic { 31 31 struct falcon falcon; 32 - bool booted; 33 32 34 33 void __iomem *regs; 35 34 struct tegra_drm_client client; ··· 51 52 writel(value, vic->regs + offset); 52 53 } 53 54 54 - static int vic_runtime_resume(struct device *dev) 55 - { 56 - struct vic *vic = dev_get_drvdata(dev); 57 - int err; 58 - 59 - err = clk_prepare_enable(vic->clk); 60 - if (err < 0) 61 - return err; 62 - 63 - usleep_range(10, 20); 64 - 65 - err = reset_control_deassert(vic->rst); 66 - if (err < 0) 67 - goto disable; 68 - 69 - usleep_range(10, 20); 70 - 71 - return 0; 72 - 73 - disable: 74 - clk_disable_unprepare(vic->clk); 75 - return err; 76 - } 77 - 78 - static int vic_runtime_suspend(struct device *dev) 79 - { 80 - struct vic *vic = dev_get_drvdata(dev); 81 - int err; 82 - 83 - err = reset_control_assert(vic->rst); 84 - if (err < 0) 85 - return err; 86 - 87 - usleep_range(2000, 4000); 88 - 89 - clk_disable_unprepare(vic->clk); 90 - 91 - vic->booted = false; 92 - 93 - return 0; 94 - } 95 - 96 55 static int vic_boot(struct vic *vic) 97 56 { 98 57 #ifdef CONFIG_IOMMU_API ··· 59 102 u32 fce_ucode_size, fce_bin_data_offset; 60 103 void *hdr; 61 104 int err = 0; 62 - 63 - if (vic->booted) 64 - return 0; 65 105 66 106 #ifdef CONFIG_IOMMU_API 67 107 if (vic->config->supports_sid && spec) { ··· 121 167 "failed to set application ID and FCE base\n"); 122 168 return err; 123 169 } 124 - 125 - vic->booted = true; 126 170 127 171 return 0; 128 172 } ··· 275 323 return err; 276 324 } 277 325 326 + 327 + static int vic_runtime_resume(struct device *dev) 328 + { 329 + struct vic *vic = dev_get_drvdata(dev); 330 + int err; 331 + 332 + err = clk_prepare_enable(vic->clk); 333 + if (err < 0) 334 + return err; 335 + 336 + usleep_range(10, 20); 337 + 338 + err = reset_control_deassert(vic->rst); 339 + if (err < 0) 340 + goto disable; 341 + 342 + usleep_range(10, 20); 343 + 344 + err = vic_load_firmware(vic); 345 + if (err < 0) 346 + goto assert; 347 + 348 + err = vic_boot(vic); 349 + if (err < 0) 350 + goto assert; 351 + 352 + return 0; 353 + 354 + assert: 355 + reset_control_assert(vic->rst); 356 + disable: 357 + clk_disable_unprepare(vic->clk); 358 + return err; 359 + } 360 + 361 + static int vic_runtime_suspend(struct device *dev) 362 + { 363 + struct vic *vic = dev_get_drvdata(dev); 364 + int err; 365 + 366 + err = reset_control_assert(vic->rst); 367 + if (err < 0) 368 + return err; 369 + 370 + usleep_range(2000, 4000); 371 + 372 + clk_disable_unprepare(vic->clk); 373 + 374 + return 0; 375 + } 376 + 278 377 static int vic_open_channel(struct tegra_drm_client *client, 279 378 struct tegra_drm_context *context) 280 379 { ··· 336 333 if (err < 0) 337 334 return err; 338 335 339 - err = vic_load_firmware(vic); 340 - if (err < 0) 341 - goto rpm_put; 342 - 343 - err = vic_boot(vic); 344 - if (err < 0) 345 - goto rpm_put; 346 - 347 336 context->channel = host1x_channel_get(vic->channel); 348 337 if (!context->channel) { 349 - err = -ENOMEM; 350 - goto rpm_put; 338 + pm_runtime_put(vic->dev); 339 + return -ENOMEM; 351 340 } 352 341 353 342 return 0; 354 - 355 - rpm_put: 356 - pm_runtime_put(vic->dev); 357 - return err; 358 343 } 359 344 360 345 static void vic_close_channel(struct tegra_drm_context *context) ··· 350 359 struct vic *vic = to_vic(context->client); 351 360 352 361 host1x_channel_put(context->channel); 353 - 354 362 pm_runtime_put(vic->dev); 355 363 } 356 364

+1

drivers/gpu/host1x/Makefile

··· 9 9 job.o \ 10 10 debug.o \ 11 11 mipi.o \ 12 + fence.o \ 12 13 hw/host1x01.o \ 13 14 hw/host1x02.o \ 14 15 hw/host1x04.o \

+52 -6

drivers/gpu/host1x/cdma.c

··· 312 312 bool signal = false; 313 313 struct host1x_job *job, *n; 314 314 315 - /* If CDMA is stopped, queue is cleared and we can return */ 316 - if (!cdma->running) 317 - return; 318 - 319 315 /* 320 316 * Walk the sync queue, reading the sync point registers as necessary, 321 317 * to consume as many sync queue entries as possible without blocking ··· 320 324 struct host1x_syncpt *sp = job->syncpt; 321 325 322 326 /* Check whether this syncpt has completed, and bail if not */ 323 - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { 327 + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && 328 + !job->cancelled) { 324 329 /* Start timer on next pending syncpt */ 325 330 if (job->timeout) 326 331 cdma_start_timer_locked(cdma, job); ··· 410 413 else 411 414 restart_addr = cdma->last_pos; 412 415 416 + if (!job) 417 + goto resume; 418 + 413 419 /* do CPU increments for the remaining syncpts */ 414 - if (job) { 420 + if (job->syncpt_recovery) { 415 421 dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", 416 422 __func__); 417 423 ··· 433 433 434 434 dev_dbg(dev, "%s: finished sync_queue modification\n", 435 435 __func__); 436 + } else { 437 + struct host1x_job *failed_job = job; 438 + 439 + host1x_job_dump(dev, job); 440 + 441 + host1x_syncpt_set_locked(job->syncpt); 442 + failed_job->cancelled = true; 443 + 444 + list_for_each_entry_continue(job, &cdma->sync_queue, list) { 445 + unsigned int i; 446 + 447 + if (job->syncpt != failed_job->syncpt) 448 + continue; 449 + 450 + for (i = 0; i < job->num_slots; i++) { 451 + unsigned int slot = (job->first_get/8 + i) % 452 + HOST1X_PUSHBUFFER_SLOTS; 453 + u32 *mapped = cdma->push_buffer.mapped; 454 + 455 + /* 456 + * Overwrite opcodes with 0 word writes 457 + * to offset 0xbad. This does nothing but 458 + * has a easily detected signature in debug 459 + * traces. 460 + */ 461 + mapped[2*slot+0] = 0x1bad0000; 462 + mapped[2*slot+1] = 0x1bad0000; 463 + } 464 + 465 + job->cancelled = true; 466 + } 467 + 468 + wmb(); 469 + 470 + update_cdma_locked(cdma); 436 471 } 437 472 473 + resume: 438 474 /* roll back DMAGET and start up channel again */ 439 475 host1x_hw_cdma_resume(host1x, cdma, restart_addr); 440 476 } ··· 525 489 struct host1x *host1x = cdma_to_host1x(cdma); 526 490 527 491 mutex_lock(&cdma->lock); 492 + 493 + /* 494 + * Check if syncpoint was locked due to previous job timeout. 495 + * This needs to be done within the cdma lock to avoid a race 496 + * with the timeout handler. 497 + */ 498 + if (job->syncpt->locked) { 499 + mutex_unlock(&cdma->lock); 500 + return -EPERM; 501 + } 528 502 529 503 if (job->timeout) { 530 504 /* init state on first submit with timeout value */

+168

drivers/gpu/host1x/fence.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Syncpoint dma_fence implementation 4 + * 5 + * Copyright (c) 2020, NVIDIA Corporation. 6 + */ 7 + 8 + #include <linux/dma-fence.h> 9 + #include <linux/file.h> 10 + #include <linux/fs.h> 11 + #include <linux/slab.h> 12 + #include <linux/sync_file.h> 13 + 14 + #include "fence.h" 15 + #include "intr.h" 16 + #include "syncpt.h" 17 + 18 + DEFINE_SPINLOCK(lock); 19 + 20 + struct host1x_syncpt_fence { 21 + struct dma_fence base; 22 + 23 + atomic_t signaling; 24 + 25 + struct host1x_syncpt *sp; 26 + u32 threshold; 27 + 28 + struct host1x_waitlist *waiter; 29 + void *waiter_ref; 30 + 31 + struct delayed_work timeout_work; 32 + }; 33 + 34 + static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) 35 + { 36 + return "host1x"; 37 + } 38 + 39 + static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) 40 + { 41 + return "syncpoint"; 42 + } 43 + 44 + static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) 45 + { 46 + return container_of(f, struct host1x_syncpt_fence, base); 47 + } 48 + 49 + static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) 50 + { 51 + struct host1x_syncpt_fence *sf = to_host1x_fence(f); 52 + int err; 53 + 54 + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) 55 + return false; 56 + 57 + dma_fence_get(f); 58 + 59 + /* 60 + * The dma_fence framework requires the fence driver to keep a 61 + * reference to any fences for which 'enable_signaling' has been 62 + * called (and that have not been signalled). 63 + * 64 + * We provide a userspace API to create arbitrary syncpoint fences, 65 + * so we cannot normally guarantee that all fences get signalled. 66 + * As such, setup a timeout, so that long-lasting fences will get 67 + * reaped eventually. 68 + */ 69 + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); 70 + 71 + err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold, 72 + HOST1X_INTR_ACTION_SIGNAL_FENCE, f, 73 + sf->waiter, &sf->waiter_ref); 74 + if (err) { 75 + cancel_delayed_work_sync(&sf->timeout_work); 76 + dma_fence_put(f); 77 + return false; 78 + } 79 + 80 + /* intr framework takes ownership of waiter */ 81 + sf->waiter = NULL; 82 + 83 + /* 84 + * The fence may get signalled at any time after the above call, 85 + * so we need to initialize all state used by signalling 86 + * before it. 87 + */ 88 + 89 + return true; 90 + } 91 + 92 + static void host1x_syncpt_fence_release(struct dma_fence *f) 93 + { 94 + struct host1x_syncpt_fence *sf = to_host1x_fence(f); 95 + 96 + if (sf->waiter) 97 + kfree(sf->waiter); 98 + 99 + dma_fence_free(f); 100 + } 101 + 102 + const struct dma_fence_ops host1x_syncpt_fence_ops = { 103 + .get_driver_name = host1x_syncpt_fence_get_driver_name, 104 + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, 105 + .enable_signaling = host1x_syncpt_fence_enable_signaling, 106 + .release = host1x_syncpt_fence_release, 107 + }; 108 + 109 + void host1x_fence_signal(struct host1x_syncpt_fence *f) 110 + { 111 + if (atomic_xchg(&f->signaling, 1)) 112 + return; 113 + 114 + /* 115 + * Cancel pending timeout work - if it races, it will 116 + * not get 'f->signaling' and return. 117 + */ 118 + cancel_delayed_work_sync(&f->timeout_work); 119 + 120 + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false); 121 + 122 + dma_fence_signal(&f->base); 123 + dma_fence_put(&f->base); 124 + } 125 + 126 + static void do_fence_timeout(struct work_struct *work) 127 + { 128 + struct delayed_work *dwork = (struct delayed_work *)work; 129 + struct host1x_syncpt_fence *f = 130 + container_of(dwork, struct host1x_syncpt_fence, timeout_work); 131 + 132 + if (atomic_xchg(&f->signaling, 1)) 133 + return; 134 + 135 + /* 136 + * Cancel pending timeout work - if it races, it will 137 + * not get 'f->signaling' and return. 138 + */ 139 + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true); 140 + 141 + dma_fence_set_error(&f->base, -ETIMEDOUT); 142 + dma_fence_signal(&f->base); 143 + dma_fence_put(&f->base); 144 + } 145 + 146 + struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) 147 + { 148 + struct host1x_syncpt_fence *fence; 149 + 150 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 151 + if (!fence) 152 + return ERR_PTR(-ENOMEM); 153 + 154 + fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); 155 + if (!fence->waiter) 156 + return ERR_PTR(-ENOMEM); 157 + 158 + fence->sp = sp; 159 + fence->threshold = threshold; 160 + 161 + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock, 162 + dma_fence_context_alloc(1), 0); 163 + 164 + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); 165 + 166 + return &fence->base; 167 + } 168 + EXPORT_SYMBOL(host1x_fence_create);

+13

drivers/gpu/host1x/fence.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2020, NVIDIA Corporation. 4 + */ 5 + 6 + #ifndef HOST1X_FENCE_H 7 + #define HOST1X_FENCE_H 8 + 9 + struct host1x_syncpt_fence; 10 + 11 + void host1x_fence_signal(struct host1x_syncpt_fence *fence); 12 + 13 + #endif

+70 -25

drivers/gpu/host1x/hw/channel_hw.c

··· 47 47 } 48 48 } 49 49 50 - static void submit_gathers(struct host1x_job *job) 50 + static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, 51 + u32 next_class) 52 + { 53 + #if HOST1X_HW >= 2 54 + host1x_cdma_push_wide(cdma, 55 + host1x_opcode_setclass( 56 + HOST1X_CLASS_HOST1X, 57 + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, 58 + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ 59 + BIT(0) | BIT(2) 60 + ), 61 + threshold, 62 + id, 63 + host1x_opcode_setclass(next_class, 0, 0) 64 + ); 65 + #else 66 + /* TODO add waitchk or use waitbases or other mitigation */ 67 + host1x_cdma_push(cdma, 68 + host1x_opcode_setclass( 69 + HOST1X_CLASS_HOST1X, 70 + host1x_uclass_wait_syncpt_r(), 71 + BIT(0) 72 + ), 73 + host1x_class_host_wait_syncpt(id, threshold) 74 + ); 75 + host1x_cdma_push(cdma, 76 + host1x_opcode_setclass(next_class, 0, 0), 77 + HOST1X_OPCODE_NOP 78 + ); 79 + #endif 80 + } 81 + 82 + static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) 51 83 { 52 84 struct host1x_cdma *cdma = &job->channel->cdma; 53 85 #if HOST1X_HW < 6 54 86 struct device *dev = job->channel->dev; 55 87 #endif 56 88 unsigned int i; 89 + u32 threshold; 57 90 58 - for (i = 0; i < job->num_gathers; i++) { 59 - struct host1x_job_gather *g = &job->gathers[i]; 60 - dma_addr_t addr = g->base + g->offset; 61 - u32 op2, op3; 91 + for (i = 0; i < job->num_cmds; i++) { 92 + struct host1x_job_cmd *cmd = &job->cmds[i]; 62 93 63 - op2 = lower_32_bits(addr); 64 - op3 = upper_32_bits(addr); 94 + if (cmd->is_wait) { 95 + if (cmd->wait.relative) 96 + threshold = job_syncpt_base + cmd->wait.threshold; 97 + else 98 + threshold = cmd->wait.threshold; 65 99 66 - trace_write_gather(cdma, g->bo, g->offset, g->words); 67 - 68 - if (op3 != 0) { 69 - #if HOST1X_HW >= 6 70 - u32 op1 = host1x_opcode_gather_wide(g->words); 71 - u32 op4 = HOST1X_OPCODE_NOP; 72 - 73 - host1x_cdma_push_wide(cdma, op1, op2, op3, op4); 74 - #else 75 - dev_err(dev, "invalid gather for push buffer %pad\n", 76 - &addr); 77 - continue; 78 - #endif 100 + submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); 79 101 } else { 80 - u32 op1 = host1x_opcode_gather(g->words); 102 + struct host1x_job_gather *g = &cmd->gather; 81 103 82 - host1x_cdma_push(cdma, op1, op2); 104 + dma_addr_t addr = g->base + g->offset; 105 + u32 op2, op3; 106 + 107 + op2 = lower_32_bits(addr); 108 + op3 = upper_32_bits(addr); 109 + 110 + trace_write_gather(cdma, g->bo, g->offset, g->words); 111 + 112 + if (op3 != 0) { 113 + #if HOST1X_HW >= 6 114 + u32 op1 = host1x_opcode_gather_wide(g->words); 115 + u32 op4 = HOST1X_OPCODE_NOP; 116 + 117 + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); 118 + #else 119 + dev_err(dev, "invalid gather for push buffer %pad\n", 120 + &addr); 121 + continue; 122 + #endif 123 + } else { 124 + u32 op1 = host1x_opcode_gather(g->words); 125 + 126 + host1x_cdma_push(cdma, op1, op2); 127 + } 83 128 } 84 129 } 85 130 } ··· 171 126 struct host1x *host = dev_get_drvdata(ch->dev->parent); 172 127 173 128 trace_host1x_channel_submit(dev_name(ch->dev), 174 - job->num_gathers, job->num_relocs, 129 + job->num_cmds, job->num_relocs, 175 130 job->syncpt->id, job->syncpt_incrs); 176 131 177 132 /* before error checks, return current max */ ··· 226 181 host1x_opcode_setclass(job->class, 0, 0), 227 182 HOST1X_OPCODE_NOP); 228 183 229 - submit_gathers(job); 184 + submit_gathers(job, syncval - user_syncpt_incrs); 230 185 231 186 /* end CDMA submit & stash pinned hMems into sync queue */ 232 187 host1x_cdma_end(&ch->cdma, job); ··· 236 191 /* schedule a submit complete interrupt */ 237 192 err = host1x_intr_add_action(host, sp, syncval, 238 193 HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, 239 - completed_waiter, NULL); 194 + completed_waiter, &job->waiter); 240 195 completed_waiter = NULL; 241 196 WARN(err, "Failed to set submit complete interrupt"); 242 197

+17 -15

drivers/gpu/host1x/hw/debug_hw.c

··· 156 156 } 157 157 } 158 158 159 - static void show_gather(struct output *o, phys_addr_t phys_addr, 159 + static void show_gather(struct output *o, dma_addr_t phys_addr, 160 160 unsigned int words, struct host1x_cdma *cdma, 161 - phys_addr_t pin_addr, u32 *map_addr) 161 + dma_addr_t pin_addr, u32 *map_addr) 162 162 { 163 163 /* Map dmaget cursor to corresponding mem handle */ 164 164 u32 offset = phys_addr - pin_addr; ··· 176 176 } 177 177 178 178 for (i = 0; i < words; i++) { 179 - u32 addr = phys_addr + i * 4; 179 + dma_addr_t addr = phys_addr + i * 4; 180 180 u32 val = *(map_addr + offset / 4 + i); 181 181 182 182 if (!data_count) { 183 - host1x_debug_output(o, "%08x: %08x: ", addr, val); 183 + host1x_debug_output(o, " %pad: %08x: ", &addr, val); 184 184 data_count = show_channel_command(o, val, &payload); 185 185 } else { 186 186 host1x_debug_cont(o, "%08x%s", val, ··· 195 195 struct push_buffer *pb = &cdma->push_buffer; 196 196 struct host1x_job *job; 197 197 198 - host1x_debug_output(o, "PUSHBUF at %pad, %u words\n", 199 - &pb->dma, pb->size / 4); 200 - 201 - show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped); 202 - 203 198 list_for_each_entry(job, &cdma->sync_queue, list) { 204 199 unsigned int i; 205 200 206 - host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", 207 - job, job->syncpt->id, job->syncpt_end, 208 - job->first_get, job->timeout, 201 + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", 202 + job->syncpt->id, job->syncpt_end, job->timeout, 209 203 job->num_slots, job->num_unpins); 210 204 211 - for (i = 0; i < job->num_gathers; i++) { 212 - struct host1x_job_gather *g = &job->gathers[i]; 205 + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, 206 + pb->dma + job->first_get, pb->mapped + job->first_get); 207 + 208 + for (i = 0; i < job->num_cmds; i++) { 209 + struct host1x_job_gather *g; 213 210 u32 *mapped; 211 + 212 + if (job->cmds[i].is_wait) 213 + continue; 214 + 215 + g = &job->cmds[i].gather; 214 216 215 217 if (job->gather_copy_mapped) 216 218 mapped = (u32 *)job->gather_copy_mapped; ··· 224 222 continue; 225 223 } 226 224 227 - host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", 225 + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", 228 226 &g->base, g->offset, g->words); 229 227 230 228 show_gather(o, g->base + g->offset, g->words, cdma,

+6 -2

drivers/gpu/host1x/hw/debug_hw_1x01.c

··· 16 16 struct output *o) 17 17 { 18 18 struct host1x_cdma *cdma = &ch->cdma; 19 + dma_addr_t dmastart, dmaend; 19 20 u32 dmaput, dmaget, dmactrl; 20 21 u32 cbstat, cbread; 21 22 u32 val, base, baseval; 22 23 24 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); 25 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); 23 26 dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); 24 27 dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); 25 28 dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); ··· 59 56 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), 60 57 cbread); 61 58 62 - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", 59 + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); 60 + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", 63 61 dmaput, dmaget, dmactrl); 64 - host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); 62 + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); 65 63 66 64 show_channel_gathers(o, cdma); 67 65 host1x_debug_output(o, "\n");

+15 -1

drivers/gpu/host1x/hw/debug_hw_1x06.c

··· 16 16 struct output *o) 17 17 { 18 18 struct host1x_cdma *cdma = &ch->cdma; 19 + dma_addr_t dmastart = 0, dmaend = 0; 19 20 u32 dmaput, dmaget, dmactrl; 20 21 u32 offset, class; 21 22 u32 ch_stat; 23 + 24 + #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 25 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); 26 + dmastart <<= 32; 27 + #endif 28 + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); 29 + 30 + #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 31 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); 32 + dmaend <<= 32; 33 + #endif 34 + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); 22 35 23 36 dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); 24 37 dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); ··· 54 41 host1x_debug_output(o, "active class %02x, offset %04x\n", 55 42 class, offset); 56 43 57 - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", 44 + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); 45 + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", 58 46 dmaput, dmaget, dmactrl); 59 47 host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); 60 48

+12

drivers/gpu/host1x/hw/hw_host1x02_uclass.h

··· 165 165 } 166 166 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 167 host1x_uclass_indoff_indroffset_f(v) 168 + static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) 169 + { 170 + return 0x4e; 171 + } 172 + #define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ 173 + host1x_uclass_load_syncpt_payload_32_r() 174 + static inline u32 host1x_uclass_wait_syncpt_32_r(void) 175 + { 176 + return 0x50; 177 + } 178 + #define HOST1X_UCLASS_WAIT_SYNCPT_32 \ 179 + host1x_uclass_wait_syncpt_32_r() 168 180 169 181 #endif

+12

drivers/gpu/host1x/hw/hw_host1x04_uclass.h

··· 165 165 } 166 166 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 167 host1x_uclass_indoff_indroffset_f(v) 168 + static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) 169 + { 170 + return 0x4e; 171 + } 172 + #define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ 173 + host1x_uclass_load_syncpt_payload_32_r() 174 + static inline u32 host1x_uclass_wait_syncpt_32_r(void) 175 + { 176 + return 0x50; 177 + } 178 + #define HOST1X_UCLASS_WAIT_SYNCPT_32 \ 179 + host1x_uclass_wait_syncpt_32_r() 168 180 169 181 #endif

+12

drivers/gpu/host1x/hw/hw_host1x05_uclass.h

··· 165 165 } 166 166 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 167 host1x_uclass_indoff_indroffset_f(v) 168 + static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) 169 + { 170 + return 0x4e; 171 + } 172 + #define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ 173 + host1x_uclass_load_syncpt_payload_32_r() 174 + static inline u32 host1x_uclass_wait_syncpt_32_r(void) 175 + { 176 + return 0x50; 177 + } 178 + #define HOST1X_UCLASS_WAIT_SYNCPT_32 \ 179 + host1x_uclass_wait_syncpt_32_r() 168 180 169 181 #endif

+12

drivers/gpu/host1x/hw/hw_host1x06_uclass.h

··· 165 165 } 166 166 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 167 host1x_uclass_indoff_indroffset_f(v) 168 + static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) 169 + { 170 + return 0x4e; 171 + } 172 + #define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ 173 + host1x_uclass_load_syncpt_payload_32_r() 174 + static inline u32 host1x_uclass_wait_syncpt_32_r(void) 175 + { 176 + return 0x50; 177 + } 178 + #define HOST1X_UCLASS_WAIT_SYNCPT_32 \ 179 + host1x_uclass_wait_syncpt_32_r() 168 180 169 181 #endif

+12

drivers/gpu/host1x/hw/hw_host1x07_uclass.h

··· 165 165 } 166 166 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 167 host1x_uclass_indoff_indroffset_f(v) 168 + static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) 169 + { 170 + return 0x4e; 171 + } 172 + #define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ 173 + host1x_uclass_load_syncpt_payload_32_r() 174 + static inline u32 host1x_uclass_wait_syncpt_32_r(void) 175 + { 176 + return 0x50; 177 + } 178 + #define HOST1X_UCLASS_WAIT_SYNCPT_32 \ 179 + host1x_uclass_wait_syncpt_32_r() 168 180 169 181 #endif

+9

drivers/gpu/host1x/intr.c

··· 13 13 #include <trace/events/host1x.h> 14 14 #include "channel.h" 15 15 #include "dev.h" 16 + #include "fence.h" 16 17 #include "intr.h" 17 18 18 19 /* Wait list management */ ··· 122 121 wake_up_interruptible(wq); 123 122 } 124 123 124 + static void action_signal_fence(struct host1x_waitlist *waiter) 125 + { 126 + struct host1x_syncpt_fence *f = waiter->data; 127 + 128 + host1x_fence_signal(f); 129 + } 130 + 125 131 typedef void (*action_handler)(struct host1x_waitlist *waiter); 126 132 127 133 static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { 128 134 action_submit_complete, 129 135 action_wakeup, 130 136 action_wakeup_interruptible, 137 + action_signal_fence, 131 138 }; 132 139 133 140 static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])

+2

drivers/gpu/host1x/intr.h

··· 33 33 */ 34 34 HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, 35 35 36 + HOST1X_INTR_ACTION_SIGNAL_FENCE, 37 + 36 38 HOST1X_INTR_ACTION_COUNT 37 39 }; 38 40

+71 -27

drivers/gpu/host1x/job.c

··· 24 24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 25 25 26 26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 27 - u32 num_cmdbufs, u32 num_relocs) 27 + u32 num_cmdbufs, u32 num_relocs, 28 + bool skip_firewall) 28 29 { 29 30 struct host1x_job *job = NULL; 30 31 unsigned int num_unpins = num_relocs; 32 + bool enable_firewall; 31 33 u64 total; 32 34 void *mem; 33 35 34 - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 36 + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; 37 + 38 + if (!enable_firewall) 35 39 num_unpins += num_cmdbufs; 36 40 37 41 /* Check that we're not going to overflow */ 38 42 total = sizeof(struct host1x_job) + 39 43 (u64)num_relocs * sizeof(struct host1x_reloc) + 40 44 (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + 41 - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + 45 + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + 42 46 (u64)num_unpins * sizeof(dma_addr_t) + 43 47 (u64)num_unpins * sizeof(u32 *); 44 48 if (total > ULONG_MAX) ··· 51 47 mem = job = kzalloc(total, GFP_KERNEL); 52 48 if (!job) 53 49 return NULL; 50 + 51 + job->enable_firewall = enable_firewall; 54 52 55 53 kref_init(&job->ref); 56 54 job->channel = ch; ··· 63 57 mem += num_relocs * sizeof(struct host1x_reloc); 64 58 job->unpins = num_unpins ? mem : NULL; 65 59 mem += num_unpins * sizeof(struct host1x_job_unpin_data); 66 - job->gathers = num_cmdbufs ? mem : NULL; 67 - mem += num_cmdbufs * sizeof(struct host1x_job_gather); 60 + job->cmds = num_cmdbufs ? mem : NULL; 61 + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); 68 62 job->addr_phys = num_unpins ? mem : NULL; 69 63 70 64 job->reloc_addr_phys = job->addr_phys; ··· 85 79 { 86 80 struct host1x_job *job = container_of(ref, struct host1x_job, ref); 87 81 82 + if (job->release) 83 + job->release(job); 84 + 85 + if (job->waiter) 86 + host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, 87 + job->waiter, false); 88 + 88 89 if (job->syncpt) 89 90 host1x_syncpt_put(job->syncpt); 90 91 ··· 107 94 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, 108 95 unsigned int words, unsigned int offset) 109 96 { 110 - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; 97 + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; 111 98 112 99 gather->words = words; 113 100 gather->bo = bo; 114 101 gather->offset = offset; 115 102 116 - job->num_gathers++; 103 + job->num_cmds++; 117 104 } 118 105 EXPORT_SYMBOL(host1x_job_add_gather); 106 + 107 + void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, 108 + bool relative, u32 next_class) 109 + { 110 + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; 111 + 112 + cmd->is_wait = true; 113 + cmd->wait.id = id; 114 + cmd->wait.threshold = thresh; 115 + cmd->wait.next_class = next_class; 116 + cmd->wait.relative = relative; 117 + 118 + job->num_cmds++; 119 + } 120 + EXPORT_SYMBOL(host1x_job_add_wait); 119 121 120 122 static unsigned int pin_job(struct host1x *host, struct host1x_job *job) 121 123 { ··· 138 110 struct device *dev = client->dev; 139 111 struct host1x_job_gather *g; 140 112 struct iommu_domain *domain; 113 + struct sg_table *sgt; 141 114 unsigned int i; 142 115 int err; 143 116 ··· 148 119 for (i = 0; i < job->num_relocs; i++) { 149 120 struct host1x_reloc *reloc = &job->relocs[i]; 150 121 dma_addr_t phys_addr, *phys; 151 - struct sg_table *sgt; 152 122 153 123 reloc->target.bo = host1x_bo_get(reloc->target.bo); 154 124 if (!reloc->target.bo) { ··· 220 192 * We will copy gathers BO content later, so there is no need to 221 193 * hold and pin them. 222 194 */ 223 - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 195 + if (job->enable_firewall) 224 196 return 0; 225 197 226 - for (i = 0; i < job->num_gathers; i++) { 198 + for (i = 0; i < job->num_cmds; i++) { 227 199 size_t gather_size = 0; 228 200 struct scatterlist *sg; 229 - struct sg_table *sgt; 230 201 dma_addr_t phys_addr; 231 202 unsigned long shift; 232 203 struct iova *alloc; 233 204 dma_addr_t *phys; 234 205 unsigned int j; 235 206 236 - g = &job->gathers[i]; 207 + if (job->cmds[i].is_wait) 208 + continue; 209 + 210 + g = &job->cmds[i].gather; 211 + 237 212 g->bo = host1x_bo_get(g->bo); 238 213 if (!g->bo) { 239 214 err = -EINVAL; ··· 327 296 if (cmdbuf != reloc->cmdbuf.bo) 328 297 continue; 329 298 330 - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { 299 + if (job->enable_firewall) { 331 300 target = (u32 *)job->gather_copy_mapped + 332 301 reloc->cmdbuf.offset / sizeof(u32) + 333 302 g->offset / sizeof(u32); ··· 569 538 fw.num_relocs = job->num_relocs; 570 539 fw.class = job->class; 571 540 572 - for (i = 0; i < job->num_gathers; i++) { 573 - struct host1x_job_gather *g = &job->gathers[i]; 541 + for (i = 0; i < job->num_cmds; i++) { 542 + struct host1x_job_gather *g; 543 + 544 + if (job->cmds[i].is_wait) 545 + continue; 546 + 547 + g = &job->cmds[i].gather; 574 548 575 549 size += g->words * sizeof(u32); 576 550 } ··· 597 561 598 562 job->gather_copy_size = size; 599 563 600 - for (i = 0; i < job->num_gathers; i++) { 601 - struct host1x_job_gather *g = &job->gathers[i]; 564 + for (i = 0; i < job->num_cmds; i++) { 565 + struct host1x_job_gather *g; 602 566 void *gather; 567 + 568 + if (job->cmds[i].is_wait) 569 + continue; 570 + g = &job->cmds[i].gather; 603 571 604 572 /* Copy the gather */ 605 573 gather = host1x_bo_mmap(g->bo); ··· 640 600 if (err) 641 601 goto out; 642 602 643 - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { 603 + if (job->enable_firewall) { 644 604 err = copy_gathers(host->dev, job, dev); 645 605 if (err) 646 606 goto out; 647 607 } 648 608 649 609 /* patch gathers */ 650 - for (i = 0; i < job->num_gathers; i++) { 651 - struct host1x_job_gather *g = &job->gathers[i]; 610 + for (i = 0; i < job->num_cmds; i++) { 611 + struct host1x_job_gather *g; 612 + 613 + if (job->cmds[i].is_wait) 614 + continue; 615 + g = &job->cmds[i].gather; 652 616 653 617 /* process each gather mem only once */ 654 618 if (g->handled) 655 619 continue; 656 620 657 621 /* copy_gathers() sets gathers base if firewall is enabled */ 658 - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 622 + if (!job->enable_firewall) 659 623 g->base = job->gather_addr_phys[i]; 660 624 661 - for (j = i + 1; j < job->num_gathers; j++) { 662 - if (job->gathers[j].bo == g->bo) { 663 - job->gathers[j].handled = true; 664 - job->gathers[j].base = g->base; 625 + for (j = i + 1; j < job->num_cmds; j++) { 626 + if (!job->cmds[j].is_wait && 627 + job->cmds[j].gather.bo == g->bo) { 628 + job->cmds[j].gather.handled = true; 629 + job->cmds[j].gather.base = g->base; 665 630 } 666 631 } 667 632 ··· 694 649 struct device *dev = unpin->dev ?: host->dev; 695 650 struct sg_table *sgt = unpin->sgt; 696 651 697 - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && 698 - unpin->size && host->domain) { 652 + if (!job->enable_firewall && unpin->size && host->domain) { 699 653 iommu_unmap(host->domain, job->addr_phys[i], 700 654 unpin->size); 701 655 free_iova(&host->iova,

+16

drivers/gpu/host1x/job.h

··· 18 18 bool handled; 19 19 }; 20 20 21 + struct host1x_job_wait { 22 + u32 id; 23 + u32 threshold; 24 + u32 next_class; 25 + bool relative; 26 + }; 27 + 28 + struct host1x_job_cmd { 29 + bool is_wait; 30 + 31 + union { 32 + struct host1x_job_gather gather; 33 + struct host1x_job_wait wait; 34 + }; 35 + }; 36 + 21 37 struct host1x_job_unpin_data { 22 38 struct host1x_bo *bo; 23 39 struct sg_table *sgt;

+2

drivers/gpu/host1x/syncpt.c

··· 407 407 408 408 atomic_set(&sp->max_val, host1x_syncpt_read(sp)); 409 409 410 + sp->locked = false; 411 + 410 412 mutex_lock(&sp->host->syncpt_mutex); 411 413 412 414 host1x_syncpt_base_free(sp->base);

+12

drivers/gpu/host1x/syncpt.h

··· 40 40 41 41 /* interrupt data */ 42 42 struct host1x_syncpt_intr intr; 43 + 44 + /* 45 + * If a submission incrementing this syncpoint fails, lock it so that 46 + * further submission cannot be made until application has handled the 47 + * failure. 48 + */ 49 + bool locked; 43 50 }; 44 51 45 52 /* Initialize sync point array */ ··· 120 113 static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) 121 114 { 122 115 return sp->id < host1x_syncpt_nb_pts(sp->host); 116 + } 117 + 118 + static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) 119 + { 120 + sp->locked = true; 123 121 } 124 122 125 123 #endif

+24 -3

include/linux/host1x.h

··· 170 170 void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, 171 171 u32 syncpt_id); 172 172 173 + struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); 174 + 173 175 /* 174 176 * host1x channel 175 177 */ ··· 218 216 struct host1x_client *client; 219 217 220 218 /* Gathers and their memory */ 221 - struct host1x_job_gather *gathers; 222 - unsigned int num_gathers; 219 + struct host1x_job_cmd *cmds; 220 + unsigned int num_cmds; 223 221 224 222 /* Array of handles to be pinned & unpinned */ 225 223 struct host1x_reloc *relocs; ··· 236 234 u32 syncpt_incrs; 237 235 u32 syncpt_end; 238 236 237 + /* Completion waiter ref */ 238 + void *waiter; 239 + 239 240 /* Maximum time to wait for this job */ 240 241 unsigned int timeout; 242 + 243 + /* Job has timed out and should be released */ 244 + bool cancelled; 241 245 242 246 /* Index and number of slots used in the push buffer */ 243 247 unsigned int first_get; ··· 265 257 266 258 /* Add a channel wait for previous ops to complete */ 267 259 bool serialize; 260 + 261 + /* Fast-forward syncpoint increments on job timeout */ 262 + bool syncpt_recovery; 263 + 264 + /* Callback called when job is freed */ 265 + void (*release)(struct host1x_job *job); 266 + void *user_data; 267 + 268 + /* Whether host1x-side firewall should be ran for this job or not */ 269 + bool enable_firewall; 268 270 }; 269 271 270 272 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 271 - u32 num_cmdbufs, u32 num_relocs); 273 + u32 num_cmdbufs, u32 num_relocs, 274 + bool skip_firewall); 272 275 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, 273 276 unsigned int words, unsigned int offset); 277 + void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, 278 + bool relative, u32 next_class); 274 279 struct host1x_job *host1x_job_get(struct host1x_job *job); 275 280 void host1x_job_put(struct host1x_job *job); 276 281 int host1x_job_pin(struct host1x_job *job, struct device *dev);

+402 -23

include/uapi/drm/tegra_drm.h

··· 1 - /* 2 - * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. 3 - * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice shall be included in 12 - * all copies or substantial portions of the Software. 13 - * 14 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 - * OTHER DEALINGS IN THE SOFTWARE. 21 - */ 1 + /* SPDX-License-Identifier: MIT */ 2 + /* Copyright (c) 2012-2020 NVIDIA Corporation */ 22 3 23 4 #ifndef _UAPI_TEGRA_DRM_H_ 24 5 #define _UAPI_TEGRA_DRM_H_ ··· 9 28 #if defined(__cplusplus) 10 29 extern "C" { 11 30 #endif 31 + 32 + /* Tegra DRM legacy UAPI. Only enabled with STAGING */ 12 33 13 34 #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) 14 35 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) ··· 632 649 #define DRM_TEGRA_SYNCPT_READ 0x02 633 650 #define DRM_TEGRA_SYNCPT_INCR 0x03 634 651 #define DRM_TEGRA_SYNCPT_WAIT 0x04 635 - #define DRM_TEGRA_OPEN_CHANNEL 0x05 636 - #define DRM_TEGRA_CLOSE_CHANNEL 0x06 652 + #define DRM_TEGRA_OPEN_CHANNEL 0x05 653 + #define DRM_TEGRA_CLOSE_CHANNEL 0x06 637 654 #define DRM_TEGRA_GET_SYNCPT 0x07 638 655 #define DRM_TEGRA_SUBMIT 0x08 639 656 #define DRM_TEGRA_GET_SYNCPT_BASE 0x09 ··· 656 673 #define DRM_IOCTL_TEGRA_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_TILING, struct drm_tegra_gem_get_tiling) 657 674 #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags) 658 675 #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags) 676 + 677 + /* New Tegra DRM UAPI */ 678 + 679 + /* 680 + * Reported by the driver in the `capabilities` field. 681 + * 682 + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent 683 + * with regard to the system memory. 684 + */ 685 + #define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) 686 + 687 + struct drm_tegra_channel_open { 688 + /** 689 + * @host1x_class: [in] 690 + * 691 + * Host1x class of the engine that will be programmed using this 692 + * channel. 693 + */ 694 + __u32 host1x_class; 695 + 696 + /** 697 + * @flags: [in] 698 + * 699 + * Flags. 700 + */ 701 + __u32 flags; 702 + 703 + /** 704 + * @context: [out] 705 + * 706 + * Opaque identifier corresponding to the opened channel. 707 + */ 708 + __u32 context; 709 + 710 + /** 711 + * @version: [out] 712 + * 713 + * Version of the engine hardware. This can be used by userspace 714 + * to determine how the engine needs to be programmed. 715 + */ 716 + __u32 version; 717 + 718 + /** 719 + * @capabilities: [out] 720 + * 721 + * Flags describing the hardware capabilities. 722 + */ 723 + __u32 capabilities; 724 + __u32 padding; 725 + }; 726 + 727 + struct drm_tegra_channel_close { 728 + /** 729 + * @context: [in] 730 + * 731 + * Identifier of the channel to close. 732 + */ 733 + __u32 context; 734 + __u32 padding; 735 + }; 736 + 737 + /* 738 + * Mapping flags that can be used to influence how the mapping is created. 739 + * 740 + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access 741 + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access 742 + */ 743 + #define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) 744 + #define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) 745 + #define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \ 746 + DRM_TEGRA_CHANNEL_MAP_WRITE) 747 + 748 + struct drm_tegra_channel_map { 749 + /** 750 + * @context: [in] 751 + * 752 + * Identifier of the channel to which make memory available for. 753 + */ 754 + __u32 context; 755 + 756 + /** 757 + * @handle: [in] 758 + * 759 + * GEM handle of the memory to map. 760 + */ 761 + __u32 handle; 762 + 763 + /** 764 + * @flags: [in] 765 + * 766 + * Flags. 767 + */ 768 + __u32 flags; 769 + 770 + /** 771 + * @mapping: [out] 772 + * 773 + * Identifier corresponding to the mapping, to be used for 774 + * relocations or unmapping later. 775 + */ 776 + __u32 mapping; 777 + }; 778 + 779 + struct drm_tegra_channel_unmap { 780 + /** 781 + * @context: [in] 782 + * 783 + * Channel identifier of the channel to unmap memory from. 784 + */ 785 + __u32 context; 786 + 787 + /** 788 + * @mapping: [in] 789 + * 790 + * Mapping identifier of the memory mapping to unmap. 791 + */ 792 + __u32 mapping; 793 + }; 794 + 795 + /* Submission */ 796 + 797 + /** 798 + * Specify that bit 39 of the patched-in address should be set to switch 799 + * swizzling between Tegra and non-Tegra sector layout on systems that store 800 + * surfaces in system memory in non-Tegra sector layout. 801 + */ 802 + #define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) 803 + 804 + struct drm_tegra_submit_buf { 805 + /** 806 + * @mapping: [in] 807 + * 808 + * Identifier of the mapping to use in the submission. 809 + */ 810 + __u32 mapping; 811 + 812 + /** 813 + * @flags: [in] 814 + * 815 + * Flags. 816 + */ 817 + __u32 flags; 818 + 819 + /** 820 + * Information for relocation patching. 821 + */ 822 + struct { 823 + /** 824 + * @target_offset: [in] 825 + * 826 + * Offset from the start of the mapping of the data whose 827 + * address is to be patched into the gather. 828 + */ 829 + __u64 target_offset; 830 + 831 + /** 832 + * @gather_offset_words: [in] 833 + * 834 + * Offset in words from the start of the gather data to 835 + * where the address should be patched into. 836 + */ 837 + __u32 gather_offset_words; 838 + 839 + /** 840 + * @shift: [in] 841 + * 842 + * Number of bits the address should be shifted right before 843 + * patching in. 844 + */ 845 + __u32 shift; 846 + } reloc; 847 + }; 848 + 849 + /** 850 + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` 851 + * buffer. Each GATHER_UPTR command uses successive words from the buffer. 852 + */ 853 + #define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 854 + /** 855 + * Wait for a syncpoint to reach a value before continuing with further 856 + * commands. 857 + */ 858 + #define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 859 + /** 860 + * Wait for a syncpoint to reach a value before continuing with further 861 + * commands. The threshold is calculated relative to the start of the job. 862 + */ 863 + #define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 864 + 865 + struct drm_tegra_submit_cmd_gather_uptr { 866 + __u32 words; 867 + __u32 reserved[3]; 868 + }; 869 + 870 + struct drm_tegra_submit_cmd_wait_syncpt { 871 + __u32 id; 872 + __u32 value; 873 + __u32 reserved[2]; 874 + }; 875 + 876 + struct drm_tegra_submit_cmd { 877 + /** 878 + * @type: [in] 879 + * 880 + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* 881 + * defines. 882 + */ 883 + __u32 type; 884 + 885 + /** 886 + * @flags: [in] 887 + * 888 + * Flags. 889 + */ 890 + __u32 flags; 891 + 892 + union { 893 + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; 894 + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; 895 + __u32 reserved[4]; 896 + }; 897 + }; 898 + 899 + struct drm_tegra_submit_syncpt { 900 + /** 901 + * @id: [in] 902 + * 903 + * ID of the syncpoint that the job will increment. 904 + */ 905 + __u32 id; 906 + 907 + /** 908 + * @flags: [in] 909 + * 910 + * Flags. 911 + */ 912 + __u32 flags; 913 + 914 + /** 915 + * @increments: [in] 916 + * 917 + * Number of times the job will increment this syncpoint. 918 + */ 919 + __u32 increments; 920 + 921 + /** 922 + * @value: [out] 923 + * 924 + * Value the syncpoint will have once the job has completed all 925 + * its specified syncpoint increments. 926 + * 927 + * Note that the kernel may increment the syncpoint before or after 928 + * the job. These increments are not reflected in this field. 929 + * 930 + * If the job hangs or times out, not all of the increments may 931 + * get executed. 932 + */ 933 + __u32 value; 934 + }; 935 + 936 + struct drm_tegra_channel_submit { 937 + /** 938 + * @context: [in] 939 + * 940 + * Identifier of the channel to submit this job to. 941 + */ 942 + __u32 context; 943 + 944 + /** 945 + * @num_bufs: [in] 946 + * 947 + * Number of elements in the `bufs_ptr` array. 948 + */ 949 + __u32 num_bufs; 950 + 951 + /** 952 + * @num_cmds: [in] 953 + * 954 + * Number of elements in the `cmds_ptr` array. 955 + */ 956 + __u32 num_cmds; 957 + 958 + /** 959 + * @gather_data_words: [in] 960 + * 961 + * Number of 32-bit words in the `gather_data_ptr` array. 962 + */ 963 + __u32 gather_data_words; 964 + 965 + /** 966 + * @bufs_ptr: [in] 967 + * 968 + * Pointer to an array of drm_tegra_submit_buf structures. 969 + */ 970 + __u64 bufs_ptr; 971 + 972 + /** 973 + * @cmds_ptr: [in] 974 + * 975 + * Pointer to an array of drm_tegra_submit_cmd structures. 976 + */ 977 + __u64 cmds_ptr; 978 + 979 + /** 980 + * @gather_data_ptr: [in] 981 + * 982 + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR 983 + * commands. 984 + */ 985 + __u64 gather_data_ptr; 986 + 987 + /** 988 + * @syncobj_in: [in] 989 + * 990 + * Handle for DRM syncobj that will be waited before submission. 991 + * Ignored if zero. 992 + */ 993 + __u32 syncobj_in; 994 + 995 + /** 996 + * @syncobj_out: [in] 997 + * 998 + * Handle for DRM syncobj that will have its fence replaced with 999 + * the job's completion fence. Ignored if zero. 1000 + */ 1001 + __u32 syncobj_out; 1002 + 1003 + /** 1004 + * @syncpt_incr: [in,out] 1005 + * 1006 + * Information about the syncpoint the job will increment. 1007 + */ 1008 + struct drm_tegra_submit_syncpt syncpt; 1009 + }; 1010 + 1011 + struct drm_tegra_syncpoint_allocate { 1012 + /** 1013 + * @id: [out] 1014 + * 1015 + * ID of allocated syncpoint. 1016 + */ 1017 + __u32 id; 1018 + __u32 padding; 1019 + }; 1020 + 1021 + struct drm_tegra_syncpoint_free { 1022 + /** 1023 + * @id: [in] 1024 + * 1025 + * ID of syncpoint to free. 1026 + */ 1027 + __u32 id; 1028 + __u32 padding; 1029 + }; 1030 + 1031 + struct drm_tegra_syncpoint_wait { 1032 + /** 1033 + * @timeout: [in] 1034 + * 1035 + * Absolute timestamp at which the wait will time out. 1036 + */ 1037 + __s64 timeout_ns; 1038 + 1039 + /** 1040 + * @id: [in] 1041 + * 1042 + * ID of syncpoint to wait on. 1043 + */ 1044 + __u32 id; 1045 + 1046 + /** 1047 + * @threshold: [in] 1048 + * 1049 + * Threshold to wait for. 1050 + */ 1051 + __u32 threshold; 1052 + 1053 + /** 1054 + * @value: [out] 1055 + * 1056 + * Value of the syncpoint upon wait completion. 1057 + */ 1058 + __u32 value; 1059 + 1060 + __u32 padding; 1061 + }; 1062 + 1063 + #define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) 1064 + #define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) 1065 + #define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) 1066 + #define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) 1067 + #define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) 1068 + 1069 + #define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) 1070 + #define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) 1071 + #define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) 659 1072 660 1073 #if defined(__cplusplus) 661 1074 }