Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux into drm-next

this is the second pull request for 3.15 radeon changes. Highlights this time:
- Better VRAM usage
- VM page table rework
- Enabling different UVD clocks again
- Some general cleanups and improvements

* 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux:
drm/radeon: remove struct radeon_bo_list
drm/radeon: drop non blocking allocations from sub allocator
drm/radeon: remove global vm lock
drm/radeon: use normal BOs for the page tables v4
drm/radeon: further cleanup vm flushing & fencing
drm/radeon: separate gart and vm functions
drm/radeon: fix VCE suspend/resume
drm/radeon: fix missing bo reservation
drm/radeon: limit how much memory TTM can move per IB according to VRAM usage
drm/radeon: validate relocations in the order determined by userspace v3
drm/radeon: add buffers to the LRU list from smallest to largest
drm/radeon: deduplicate code in radeon_gem_busy_ioctl
drm/radeon: track memory statistics about VRAM and GTT usage and buffer moves v2
drm/radeon: add a way to get and set initial buffer domains v2
drm/radeon: use variable UVD clocks
drm/radeon: cleanup the fence ring locking code
drm/radeon: improve ring lockup detection code v2

+1623 -1392
+1 -1
drivers/gpu/drm/radeon/Makefile
··· 80 80 r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ 81 81 rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ 82 82 trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ 83 - ci_dpm.o dce6_afmt.o 83 + ci_dpm.o dce6_afmt.o radeon_vm.o 84 84 85 85 # add async DMA block 86 86 radeon-y += \
+105 -105
drivers/gpu/drm/radeon/evergreen_cs.c
··· 1165 1165 "0x%04X\n", reg); 1166 1166 return -EINVAL; 1167 1167 } 1168 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1168 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1169 1169 break; 1170 1170 case DB_DEPTH_CONTROL: 1171 1171 track->db_depth_control = radeon_get_ib_value(p, idx); ··· 1196 1196 } 1197 1197 ib[idx] &= ~Z_ARRAY_MODE(0xf); 1198 1198 track->db_z_info &= ~Z_ARRAY_MODE(0xf); 1199 - ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1200 - track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1201 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1199 + ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1200 + track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1201 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1202 1202 unsigned bankw, bankh, mtaspect, tile_split; 1203 1203 1204 - evergreen_tiling_fields(reloc->lobj.tiling_flags, 1204 + evergreen_tiling_fields(reloc->tiling_flags, 1205 1205 &bankw, &bankh, &mtaspect, 1206 1206 &tile_split); 1207 1207 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); ··· 1237 1237 return -EINVAL; 1238 1238 } 1239 1239 track->db_z_read_offset = radeon_get_ib_value(p, idx); 1240 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1240 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1241 1241 track->db_z_read_bo = reloc->robj; 1242 1242 track->db_dirty = true; 1243 1243 break; ··· 1249 1249 return -EINVAL; 1250 1250 } 1251 1251 track->db_z_write_offset = radeon_get_ib_value(p, idx); 1252 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1252 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1253 1253 track->db_z_write_bo = reloc->robj; 1254 1254 track->db_dirty = true; 1255 1255 break; ··· 1261 1261 return -EINVAL; 1262 1262 } 1263 1263 track->db_s_read_offset = radeon_get_ib_value(p, idx); 1264 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1264 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1265 1265 track->db_s_read_bo = reloc->robj; 1266 1266 track->db_dirty = true; 1267 1267 break; ··· 1273 1273 return -EINVAL; 1274 1274 } 1275 1275 track->db_s_write_offset = radeon_get_ib_value(p, idx); 1276 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1276 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1277 1277 track->db_s_write_bo = reloc->robj; 1278 1278 track->db_dirty = true; 1279 1279 break; ··· 1297 1297 } 1298 1298 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; 1299 1299 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; 1300 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1300 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1301 1301 track->vgt_strmout_bo[tmp] = reloc->robj; 1302 1302 track->streamout_dirty = true; 1303 1303 break; ··· 1317 1317 "0x%04X\n", reg); 1318 1318 return -EINVAL; 1319 1319 } 1320 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1320 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1321 1321 case CB_TARGET_MASK: 1322 1322 track->cb_target_mask = radeon_get_ib_value(p, idx); 1323 1323 track->cb_dirty = true; ··· 1381 1381 "0x%04X\n", reg); 1382 1382 return -EINVAL; 1383 1383 } 1384 - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1385 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1384 + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1385 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1386 1386 } 1387 1387 track->cb_dirty = true; 1388 1388 break; ··· 1399 1399 "0x%04X\n", reg); 1400 1400 return -EINVAL; 1401 1401 } 1402 - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1403 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1402 + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1403 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1404 1404 } 1405 1405 track->cb_dirty = true; 1406 1406 break; ··· 1461 1461 return -EINVAL; 1462 1462 } 1463 1463 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1464 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1464 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1465 1465 unsigned bankw, bankh, mtaspect, tile_split; 1466 1466 1467 - evergreen_tiling_fields(reloc->lobj.tiling_flags, 1467 + evergreen_tiling_fields(reloc->tiling_flags, 1468 1468 &bankw, &bankh, &mtaspect, 1469 1469 &tile_split); 1470 1470 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); ··· 1489 1489 return -EINVAL; 1490 1490 } 1491 1491 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1492 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1492 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1493 1493 unsigned bankw, bankh, mtaspect, tile_split; 1494 1494 1495 - evergreen_tiling_fields(reloc->lobj.tiling_flags, 1495 + evergreen_tiling_fields(reloc->tiling_flags, 1496 1496 &bankw, &bankh, &mtaspect, 1497 1497 &tile_split); 1498 1498 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); ··· 1520 1520 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1521 1521 return -EINVAL; 1522 1522 } 1523 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1523 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1524 1524 track->cb_color_fmask_bo[tmp] = reloc->robj; 1525 1525 break; 1526 1526 case CB_COLOR0_CMASK: ··· 1537 1537 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1538 1538 return -EINVAL; 1539 1539 } 1540 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1540 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1541 1541 track->cb_color_cmask_bo[tmp] = reloc->robj; 1542 1542 break; 1543 1543 case CB_COLOR0_FMASK_SLICE: ··· 1578 1578 } 1579 1579 tmp = (reg - CB_COLOR0_BASE) / 0x3c; 1580 1580 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1581 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1581 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1582 1582 track->cb_color_bo[tmp] = reloc->robj; 1583 1583 track->cb_dirty = true; 1584 1584 break; ··· 1594 1594 } 1595 1595 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; 1596 1596 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1597 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1597 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1598 1598 track->cb_color_bo[tmp] = reloc->robj; 1599 1599 track->cb_dirty = true; 1600 1600 break; ··· 1606 1606 return -EINVAL; 1607 1607 } 1608 1608 track->htile_offset = radeon_get_ib_value(p, idx); 1609 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1609 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1610 1610 track->htile_bo = reloc->robj; 1611 1611 track->db_dirty = true; 1612 1612 break; ··· 1723 1723 "0x%04X\n", reg); 1724 1724 return -EINVAL; 1725 1725 } 1726 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1726 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1727 1727 break; 1728 1728 case SX_MEMORY_EXPORT_BASE: 1729 1729 if (p->rdev->family >= CHIP_CAYMAN) { ··· 1737 1737 "0x%04X\n", reg); 1738 1738 return -EINVAL; 1739 1739 } 1740 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1740 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1741 1741 break; 1742 1742 case CAYMAN_SX_SCATTER_EXPORT_BASE: 1743 1743 if (p->rdev->family < CHIP_CAYMAN) { ··· 1751 1751 "0x%04X\n", reg); 1752 1752 return -EINVAL; 1753 1753 } 1754 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1754 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1755 1755 break; 1756 1756 case SX_MISC: 1757 1757 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; ··· 1836 1836 return -EINVAL; 1837 1837 } 1838 1838 1839 - offset = reloc->lobj.gpu_offset + 1839 + offset = reloc->gpu_offset + 1840 1840 (idx_value & 0xfffffff0) + 1841 1841 ((u64)(tmp & 0xff) << 32); 1842 1842 ··· 1882 1882 return -EINVAL; 1883 1883 } 1884 1884 1885 - offset = reloc->lobj.gpu_offset + 1885 + offset = reloc->gpu_offset + 1886 1886 idx_value + 1887 1887 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1888 1888 ··· 1909 1909 return -EINVAL; 1910 1910 } 1911 1911 1912 - offset = reloc->lobj.gpu_offset + 1912 + offset = reloc->gpu_offset + 1913 1913 idx_value + 1914 1914 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1915 1915 ··· 1937 1937 return -EINVAL; 1938 1938 } 1939 1939 1940 - offset = reloc->lobj.gpu_offset + 1940 + offset = reloc->gpu_offset + 1941 1941 radeon_get_ib_value(p, idx+1) + 1942 1942 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1943 1943 ··· 2027 2027 DRM_ERROR("bad DISPATCH_INDIRECT\n"); 2028 2028 return -EINVAL; 2029 2029 } 2030 - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); 2030 + ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); 2031 2031 r = evergreen_cs_track_check(p); 2032 2032 if (r) { 2033 2033 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); ··· 2049 2049 return -EINVAL; 2050 2050 } 2051 2051 2052 - offset = reloc->lobj.gpu_offset + 2052 + offset = reloc->gpu_offset + 2053 2053 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2054 2054 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2055 2055 ··· 2106 2106 tmp = radeon_get_ib_value(p, idx) + 2107 2107 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 2108 2108 2109 - offset = reloc->lobj.gpu_offset + tmp; 2109 + offset = reloc->gpu_offset + tmp; 2110 2110 2111 2111 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2112 2112 dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", ··· 2144 2144 tmp = radeon_get_ib_value(p, idx+2) + 2145 2145 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); 2146 2146 2147 - offset = reloc->lobj.gpu_offset + tmp; 2147 + offset = reloc->gpu_offset + tmp; 2148 2148 2149 2149 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2150 2150 dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", ··· 2174 2174 DRM_ERROR("bad SURFACE_SYNC\n"); 2175 2175 return -EINVAL; 2176 2176 } 2177 - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 2177 + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2178 2178 } 2179 2179 break; 2180 2180 case PACKET3_EVENT_WRITE: ··· 2190 2190 DRM_ERROR("bad EVENT_WRITE\n"); 2191 2191 return -EINVAL; 2192 2192 } 2193 - offset = reloc->lobj.gpu_offset + 2193 + offset = reloc->gpu_offset + 2194 2194 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + 2195 2195 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2196 2196 ··· 2212 2212 return -EINVAL; 2213 2213 } 2214 2214 2215 - offset = reloc->lobj.gpu_offset + 2215 + offset = reloc->gpu_offset + 2216 2216 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2217 2217 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2218 2218 ··· 2234 2234 return -EINVAL; 2235 2235 } 2236 2236 2237 - offset = reloc->lobj.gpu_offset + 2237 + offset = reloc->gpu_offset + 2238 2238 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2239 2239 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2240 2240 ··· 2302 2302 } 2303 2303 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 2304 2304 ib[idx+1+(i*8)+1] |= 2305 - TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 2306 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 2305 + TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 2306 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 2307 2307 unsigned bankw, bankh, mtaspect, tile_split; 2308 2308 2309 - evergreen_tiling_fields(reloc->lobj.tiling_flags, 2309 + evergreen_tiling_fields(reloc->tiling_flags, 2310 2310 &bankw, &bankh, &mtaspect, 2311 2311 &tile_split); 2312 2312 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); ··· 2318 2318 } 2319 2319 } 2320 2320 texture = reloc->robj; 2321 - toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 2321 + toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2322 2322 2323 2323 /* tex mip base */ 2324 2324 tex_dim = ib[idx+1+(i*8)+0] & 0x7; ··· 2337 2337 DRM_ERROR("bad SET_RESOURCE (tex)\n"); 2338 2338 return -EINVAL; 2339 2339 } 2340 - moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 2340 + moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2341 2341 mipmap = reloc->robj; 2342 2342 } 2343 2343 ··· 2364 2364 ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; 2365 2365 } 2366 2366 2367 - offset64 = reloc->lobj.gpu_offset + offset; 2367 + offset64 = reloc->gpu_offset + offset; 2368 2368 ib[idx+1+(i*8)+0] = offset64; 2369 2369 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | 2370 2370 (upper_32_bits(offset64) & 0xff); ··· 2445 2445 offset + 4, radeon_bo_size(reloc->robj)); 2446 2446 return -EINVAL; 2447 2447 } 2448 - offset += reloc->lobj.gpu_offset; 2448 + offset += reloc->gpu_offset; 2449 2449 ib[idx+1] = offset; 2450 2450 ib[idx+2] = upper_32_bits(offset) & 0xff; 2451 2451 } ··· 2464 2464 offset + 4, radeon_bo_size(reloc->robj)); 2465 2465 return -EINVAL; 2466 2466 } 2467 - offset += reloc->lobj.gpu_offset; 2467 + offset += reloc->gpu_offset; 2468 2468 ib[idx+3] = offset; 2469 2469 ib[idx+4] = upper_32_bits(offset) & 0xff; 2470 2470 } ··· 2493 2493 offset + 8, radeon_bo_size(reloc->robj)); 2494 2494 return -EINVAL; 2495 2495 } 2496 - offset += reloc->lobj.gpu_offset; 2496 + offset += reloc->gpu_offset; 2497 2497 ib[idx+0] = offset; 2498 2498 ib[idx+1] = upper_32_bits(offset) & 0xff; 2499 2499 break; ··· 2518 2518 offset + 4, radeon_bo_size(reloc->robj)); 2519 2519 return -EINVAL; 2520 2520 } 2521 - offset += reloc->lobj.gpu_offset; 2521 + offset += reloc->gpu_offset; 2522 2522 ib[idx+1] = offset; 2523 2523 ib[idx+2] = upper_32_bits(offset) & 0xff; 2524 2524 } else { ··· 2542 2542 offset + 4, radeon_bo_size(reloc->robj)); 2543 2543 return -EINVAL; 2544 2544 } 2545 - offset += reloc->lobj.gpu_offset; 2545 + offset += reloc->gpu_offset; 2546 2546 ib[idx+3] = offset; 2547 2547 ib[idx+4] = upper_32_bits(offset) & 0xff; 2548 2548 } else { ··· 2717 2717 dst_offset = radeon_get_ib_value(p, idx+1); 2718 2718 dst_offset <<= 8; 2719 2719 2720 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2720 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2721 2721 p->idx += count + 7; 2722 2722 break; 2723 2723 /* linear */ ··· 2725 2725 dst_offset = radeon_get_ib_value(p, idx+1); 2726 2726 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2727 2727 2728 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2729 - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2728 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2729 + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2730 2730 p->idx += count + 3; 2731 2731 break; 2732 2732 default: ··· 2768 2768 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2769 2769 return -EINVAL; 2770 2770 } 2771 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2772 - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2773 - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2774 - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2771 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2772 + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2773 + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2774 + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2775 2775 p->idx += 5; 2776 2776 break; 2777 2777 /* Copy L2T/T2L */ ··· 2781 2781 /* tiled src, linear dst */ 2782 2782 src_offset = radeon_get_ib_value(p, idx+1); 2783 2783 src_offset <<= 8; 2784 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2784 + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2785 2785 2786 2786 dst_offset = radeon_get_ib_value(p, idx + 7); 2787 2787 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2788 - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2789 - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2788 + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2789 + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2790 2790 } else { 2791 2791 /* linear src, tiled dst */ 2792 2792 src_offset = radeon_get_ib_value(p, idx+7); 2793 2793 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2794 - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2795 - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2794 + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2795 + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2796 2796 2797 2797 dst_offset = radeon_get_ib_value(p, idx+1); 2798 2798 dst_offset <<= 8; 2799 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2799 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2800 2800 } 2801 2801 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2802 2802 dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", ··· 2827 2827 dst_offset + count, radeon_bo_size(dst_reloc->robj)); 2828 2828 return -EINVAL; 2829 2829 } 2830 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); 2831 - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); 2832 - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2833 - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2830 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2831 + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2832 + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2833 + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2834 2834 p->idx += 5; 2835 2835 break; 2836 2836 /* Copy L2L, partial */ ··· 2840 2840 DRM_ERROR("L2L Partial is cayman only !\n"); 2841 2841 return -EINVAL; 2842 2842 } 2843 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); 2844 - ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2845 - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); 2846 - ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2843 + ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2844 + ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2845 + ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2846 + ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2847 2847 2848 2848 p->idx += 9; 2849 2849 break; ··· 2876 2876 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2877 2877 return -EINVAL; 2878 2878 } 2879 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2880 - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); 2881 - ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2882 - ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2883 - ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; 2884 - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2879 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2880 + ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); 2881 + ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2882 + ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2883 + ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; 2884 + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2885 2885 p->idx += 7; 2886 2886 break; 2887 2887 /* Copy L2T Frame to Field */ ··· 2916 2916 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2917 2917 return -EINVAL; 2918 2918 } 2919 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2920 - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); 2921 - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2922 - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2919 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2920 + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 2921 + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2922 + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2923 2923 p->idx += 10; 2924 2924 break; 2925 2925 /* Copy L2T/T2L, partial */ ··· 2932 2932 /* detile bit */ 2933 2933 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2934 2934 /* tiled src, linear dst */ 2935 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2935 + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2936 2936 2937 - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2938 - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2937 + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2938 + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2939 2939 } else { 2940 2940 /* linear src, tiled dst */ 2941 - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2942 - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2941 + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2942 + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2943 2943 2944 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2944 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2945 2945 } 2946 2946 p->idx += 12; 2947 2947 break; ··· 2978 2978 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2979 2979 return -EINVAL; 2980 2980 } 2981 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2982 - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); 2983 - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2984 - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2981 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2982 + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 2983 + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2984 + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2985 2985 p->idx += 10; 2986 2986 break; 2987 2987 /* Copy L2T/T2L (tile units) */ ··· 2992 2992 /* tiled src, linear dst */ 2993 2993 src_offset = radeon_get_ib_value(p, idx+1); 2994 2994 src_offset <<= 8; 2995 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2995 + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2996 2996 2997 2997 dst_offset = radeon_get_ib_value(p, idx+7); 2998 2998 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2999 - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 3000 - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2999 + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3000 + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 3001 3001 } else { 3002 3002 /* linear src, tiled dst */ 3003 3003 src_offset = radeon_get_ib_value(p, idx+7); 3004 3004 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 3005 - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 3006 - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 3005 + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3006 + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3007 3007 3008 3008 dst_offset = radeon_get_ib_value(p, idx+1); 3009 3009 dst_offset <<= 8; 3010 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 3010 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3011 3011 } 3012 3012 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3013 3013 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", ··· 3028 3028 DRM_ERROR("L2T, T2L Partial is cayman only !\n"); 3029 3029 return -EINVAL; 3030 3030 } 3031 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 3032 - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 3031 + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 3032 + ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); 3033 3033 p->idx += 13; 3034 3034 break; 3035 3035 /* Copy L2T broadcast (tile units) */ ··· 3065 3065 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3066 3066 return -EINVAL; 3067 3067 } 3068 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 3069 - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); 3070 - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 3071 - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 3068 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3069 + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 3070 + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3071 + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3072 3072 p->idx += 10; 3073 3073 break; 3074 3074 default: ··· 3089 3089 dst_offset, radeon_bo_size(dst_reloc->robj)); 3090 3090 return -EINVAL; 3091 3091 } 3092 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 3093 - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; 3092 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3093 + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; 3094 3094 p->idx += 4; 3095 3095 break; 3096 3096 case DMA_PACKET_NOP:
+20 -20
drivers/gpu/drm/radeon/r100.c
··· 1274 1274 1275 1275 value = radeon_get_ib_value(p, idx); 1276 1276 tmp = value & 0x003fffff; 1277 - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 1277 + tmp += (((u32)reloc->gpu_offset) >> 10); 1278 1278 1279 1279 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1280 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1280 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 1281 1281 tile_flags |= RADEON_DST_TILE_MACRO; 1282 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 1282 + if (reloc->tiling_flags & RADEON_TILING_MICRO) { 1283 1283 if (reg == RADEON_SRC_PITCH_OFFSET) { 1284 1284 DRM_ERROR("Cannot src blit from microtiled surface\n"); 1285 1285 radeon_cs_dump_packet(p, pkt); ··· 1325 1325 return r; 1326 1326 } 1327 1327 idx_value = radeon_get_ib_value(p, idx); 1328 - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1328 + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); 1329 1329 1330 1330 track->arrays[i + 0].esize = idx_value >> 8; 1331 1331 track->arrays[i + 0].robj = reloc->robj; ··· 1337 1337 radeon_cs_dump_packet(p, pkt); 1338 1338 return r; 1339 1339 } 1340 - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); 1340 + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset); 1341 1341 track->arrays[i + 1].robj = reloc->robj; 1342 1342 track->arrays[i + 1].esize = idx_value >> 24; 1343 1343 track->arrays[i + 1].esize &= 0x7F; ··· 1351 1351 return r; 1352 1352 } 1353 1353 idx_value = radeon_get_ib_value(p, idx); 1354 - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1354 + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); 1355 1355 track->arrays[i + 0].robj = reloc->robj; 1356 1356 track->arrays[i + 0].esize = idx_value >> 8; 1357 1357 track->arrays[i + 0].esize &= 0x7F; ··· 1594 1594 track->zb.robj = reloc->robj; 1595 1595 track->zb.offset = idx_value; 1596 1596 track->zb_dirty = true; 1597 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1597 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1598 1598 break; 1599 1599 case RADEON_RB3D_COLOROFFSET: 1600 1600 r = radeon_cs_packet_next_reloc(p, &reloc, 0); ··· 1607 1607 track->cb[0].robj = reloc->robj; 1608 1608 track->cb[0].offset = idx_value; 1609 1609 track->cb_dirty = true; 1610 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1610 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1611 1611 break; 1612 1612 case RADEON_PP_TXOFFSET_0: 1613 1613 case RADEON_PP_TXOFFSET_1: ··· 1621 1621 return r; 1622 1622 } 1623 1623 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1624 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1624 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 1625 1625 tile_flags |= RADEON_TXO_MACRO_TILE; 1626 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1626 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 1627 1627 tile_flags |= RADEON_TXO_MICRO_TILE_X2; 1628 1628 1629 1629 tmp = idx_value & ~(0x7 << 2); 1630 1630 tmp |= tile_flags; 1631 - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); 1631 + ib[idx] = tmp + ((u32)reloc->gpu_offset); 1632 1632 } else 1633 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1633 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1634 1634 track->textures[i].robj = reloc->robj; 1635 1635 track->tex_dirty = true; 1636 1636 break; ··· 1648 1648 return r; 1649 1649 } 1650 1650 track->textures[0].cube_info[i].offset = idx_value; 1651 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1651 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1652 1652 track->textures[0].cube_info[i].robj = reloc->robj; 1653 1653 track->tex_dirty = true; 1654 1654 break; ··· 1666 1666 return r; 1667 1667 } 1668 1668 track->textures[1].cube_info[i].offset = idx_value; 1669 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1669 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1670 1670 track->textures[1].cube_info[i].robj = reloc->robj; 1671 1671 track->tex_dirty = true; 1672 1672 break; ··· 1684 1684 return r; 1685 1685 } 1686 1686 track->textures[2].cube_info[i].offset = idx_value; 1687 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1687 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1688 1688 track->textures[2].cube_info[i].robj = reloc->robj; 1689 1689 track->tex_dirty = true; 1690 1690 break; ··· 1702 1702 return r; 1703 1703 } 1704 1704 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1705 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1705 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 1706 1706 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1707 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1707 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 1708 1708 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1709 1709 1710 1710 tmp = idx_value & ~(0x7 << 16); ··· 1772 1772 radeon_cs_dump_packet(p, pkt); 1773 1773 return r; 1774 1774 } 1775 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1775 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1776 1776 break; 1777 1777 case RADEON_PP_CNTL: 1778 1778 { ··· 1932 1932 radeon_cs_dump_packet(p, pkt); 1933 1933 return r; 1934 1934 } 1935 - ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); 1935 + ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset); 1936 1936 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1937 1937 if (r) { 1938 1938 return r; ··· 1946 1946 radeon_cs_dump_packet(p, pkt); 1947 1947 return r; 1948 1948 } 1949 - ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); 1949 + ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset); 1950 1950 track->num_arrays = 1; 1951 1951 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); 1952 1952
+10 -10
drivers/gpu/drm/radeon/r200.c
··· 185 185 track->zb.robj = reloc->robj; 186 186 track->zb.offset = idx_value; 187 187 track->zb_dirty = true; 188 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 188 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 189 189 break; 190 190 case RADEON_RB3D_COLOROFFSET: 191 191 r = radeon_cs_packet_next_reloc(p, &reloc, 0); ··· 198 198 track->cb[0].robj = reloc->robj; 199 199 track->cb[0].offset = idx_value; 200 200 track->cb_dirty = true; 201 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 201 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 202 202 break; 203 203 case R200_PP_TXOFFSET_0: 204 204 case R200_PP_TXOFFSET_1: ··· 215 215 return r; 216 216 } 217 217 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 218 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 218 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 219 219 tile_flags |= R200_TXO_MACRO_TILE; 220 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 220 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 221 221 tile_flags |= R200_TXO_MICRO_TILE; 222 222 223 223 tmp = idx_value & ~(0x7 << 2); 224 224 tmp |= tile_flags; 225 - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); 225 + ib[idx] = tmp + ((u32)reloc->gpu_offset); 226 226 } else 227 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 227 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 228 228 track->textures[i].robj = reloc->robj; 229 229 track->tex_dirty = true; 230 230 break; ··· 268 268 return r; 269 269 } 270 270 track->textures[i].cube_info[face - 1].offset = idx_value; 271 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 271 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 272 272 track->textures[i].cube_info[face - 1].robj = reloc->robj; 273 273 track->tex_dirty = true; 274 274 break; ··· 287 287 } 288 288 289 289 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 290 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 290 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 291 291 tile_flags |= RADEON_COLOR_TILE_ENABLE; 292 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 292 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 293 293 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 294 294 295 295 tmp = idx_value & ~(0x7 << 16); ··· 362 362 radeon_cs_dump_packet(p, pkt); 363 363 return r; 364 364 } 365 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 365 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 366 366 break; 367 367 case RADEON_PP_CNTL: 368 368 {
+16 -16
drivers/gpu/drm/radeon/r300.c
··· 640 640 track->cb[i].robj = reloc->robj; 641 641 track->cb[i].offset = idx_value; 642 642 track->cb_dirty = true; 643 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 643 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 644 644 break; 645 645 case R300_ZB_DEPTHOFFSET: 646 646 r = radeon_cs_packet_next_reloc(p, &reloc, 0); ··· 653 653 track->zb.robj = reloc->robj; 654 654 track->zb.offset = idx_value; 655 655 track->zb_dirty = true; 656 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 656 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 657 657 break; 658 658 case R300_TX_OFFSET_0: 659 659 case R300_TX_OFFSET_0+4: ··· 682 682 683 683 if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) { 684 684 ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ 685 - ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); 685 + ((idx_value & ~31) + (u32)reloc->gpu_offset); 686 686 } else { 687 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 687 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 688 688 tile_flags |= R300_TXO_MACRO_TILE; 689 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 689 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 690 690 tile_flags |= R300_TXO_MICRO_TILE; 691 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 691 + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) 692 692 tile_flags |= R300_TXO_MICRO_TILE_SQUARE; 693 693 694 - tmp = idx_value + ((u32)reloc->lobj.gpu_offset); 694 + tmp = idx_value + ((u32)reloc->gpu_offset); 695 695 tmp |= tile_flags; 696 696 ib[idx] = tmp; 697 697 } ··· 753 753 return r; 754 754 } 755 755 756 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 756 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 757 757 tile_flags |= R300_COLOR_TILE_ENABLE; 758 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 758 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 759 759 tile_flags |= R300_COLOR_MICROTILE_ENABLE; 760 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 760 + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) 761 761 tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; 762 762 763 763 tmp = idx_value & ~(0x7 << 16); ··· 838 838 return r; 839 839 } 840 840 841 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 841 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 842 842 tile_flags |= R300_DEPTHMACROTILE_ENABLE; 843 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 843 + if (reloc->tiling_flags & RADEON_TILING_MICRO) 844 844 tile_flags |= R300_DEPTHMICROTILE_TILED; 845 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 845 + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) 846 846 tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; 847 847 848 848 tmp = idx_value & ~(0x7 << 16); ··· 1052 1052 radeon_cs_dump_packet(p, pkt); 1053 1053 return r; 1054 1054 } 1055 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1055 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1056 1056 break; 1057 1057 case 0x4e0c: 1058 1058 /* RB3D_COLOR_CHANNEL_MASK */ ··· 1097 1097 track->aa.robj = reloc->robj; 1098 1098 track->aa.offset = idx_value; 1099 1099 track->aa_dirty = true; 1100 - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1100 + ib[idx] = idx_value + ((u32)reloc->gpu_offset); 1101 1101 break; 1102 1102 case R300_RB3D_AARESOLVE_PITCH: 1103 1103 track->aa.pitch = idx_value & 0x3FFE; ··· 1162 1162 radeon_cs_dump_packet(p, pkt); 1163 1163 return r; 1164 1164 } 1165 - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1165 + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); 1166 1166 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1167 1167 if (r) { 1168 1168 return r;
+55 -55
drivers/gpu/drm/radeon/r600_cs.c
··· 1022 1022 "0x%04X\n", reg); 1023 1023 return -EINVAL; 1024 1024 } 1025 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1025 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1026 1026 break; 1027 1027 case SQ_CONFIG: 1028 1028 track->sq_config = radeon_get_ib_value(p, idx); ··· 1043 1043 track->db_depth_info = radeon_get_ib_value(p, idx); 1044 1044 ib[idx] &= C_028010_ARRAY_MODE; 1045 1045 track->db_depth_info &= C_028010_ARRAY_MODE; 1046 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1046 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1047 1047 ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); 1048 1048 track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); 1049 1049 } else { ··· 1084 1084 } 1085 1085 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; 1086 1086 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; 1087 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1087 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1088 1088 track->vgt_strmout_bo[tmp] = reloc->robj; 1089 - track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; 1089 + track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset; 1090 1090 track->streamout_dirty = true; 1091 1091 break; 1092 1092 case VGT_STRMOUT_BUFFER_SIZE_0: ··· 1105 1105 "0x%04X\n", reg); 1106 1106 return -EINVAL; 1107 1107 } 1108 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1108 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1109 1109 break; 1110 1110 case R_028238_CB_TARGET_MASK: 1111 1111 track->cb_target_mask = radeon_get_ib_value(p, idx); ··· 1142 1142 } 1143 1143 tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4; 1144 1144 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 1145 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1145 + if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1146 1146 ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); 1147 1147 track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); 1148 - } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 1148 + } else if (reloc->tiling_flags & RADEON_TILING_MICRO) { 1149 1149 ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); 1150 1150 track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); 1151 1151 } ··· 1214 1214 } 1215 1215 track->cb_color_frag_bo[tmp] = reloc->robj; 1216 1216 track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8; 1217 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1217 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1218 1218 } 1219 1219 if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { 1220 1220 track->cb_dirty = true; ··· 1245 1245 } 1246 1246 track->cb_color_tile_bo[tmp] = reloc->robj; 1247 1247 track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8; 1248 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1248 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1249 1249 } 1250 1250 if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { 1251 1251 track->cb_dirty = true; ··· 1281 1281 } 1282 1282 tmp = (reg - CB_COLOR0_BASE) / 4; 1283 1283 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; 1284 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1284 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1285 1285 track->cb_color_base_last[tmp] = ib[idx]; 1286 1286 track->cb_color_bo[tmp] = reloc->robj; 1287 - track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; 1287 + track->cb_color_bo_mc[tmp] = reloc->gpu_offset; 1288 1288 track->cb_dirty = true; 1289 1289 break; 1290 1290 case DB_DEPTH_BASE: ··· 1295 1295 return -EINVAL; 1296 1296 } 1297 1297 track->db_offset = radeon_get_ib_value(p, idx) << 8; 1298 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1298 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1299 1299 track->db_bo = reloc->robj; 1300 - track->db_bo_mc = reloc->lobj.gpu_offset; 1300 + track->db_bo_mc = reloc->gpu_offset; 1301 1301 track->db_dirty = true; 1302 1302 break; 1303 1303 case DB_HTILE_DATA_BASE: ··· 1308 1308 return -EINVAL; 1309 1309 } 1310 1310 track->htile_offset = radeon_get_ib_value(p, idx) << 8; 1311 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1311 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1312 1312 track->htile_bo = reloc->robj; 1313 1313 track->db_dirty = true; 1314 1314 break; ··· 1377 1377 "0x%04X\n", reg); 1378 1378 return -EINVAL; 1379 1379 } 1380 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1380 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1381 1381 break; 1382 1382 case SX_MEMORY_EXPORT_BASE: 1383 1383 r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); ··· 1386 1386 "0x%04X\n", reg); 1387 1387 return -EINVAL; 1388 1388 } 1389 - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1389 + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1390 1390 break; 1391 1391 case SX_MISC: 1392 1392 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; ··· 1672 1672 return -EINVAL; 1673 1673 } 1674 1674 1675 - offset = reloc->lobj.gpu_offset + 1675 + offset = reloc->gpu_offset + 1676 1676 (idx_value & 0xfffffff0) + 1677 1677 ((u64)(tmp & 0xff) << 32); 1678 1678 ··· 1713 1713 return -EINVAL; 1714 1714 } 1715 1715 1716 - offset = reloc->lobj.gpu_offset + 1716 + offset = reloc->gpu_offset + 1717 1717 idx_value + 1718 1718 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1719 1719 ··· 1765 1765 return -EINVAL; 1766 1766 } 1767 1767 1768 - offset = reloc->lobj.gpu_offset + 1768 + offset = reloc->gpu_offset + 1769 1769 (radeon_get_ib_value(p, idx+1) & 0xfffffff0) + 1770 1770 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1771 1771 ··· 1805 1805 tmp = radeon_get_ib_value(p, idx) + 1806 1806 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1807 1807 1808 - offset = reloc->lobj.gpu_offset + tmp; 1808 + offset = reloc->gpu_offset + tmp; 1809 1809 1810 1810 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 1811 1811 dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", ··· 1835 1835 tmp = radeon_get_ib_value(p, idx+2) + 1836 1836 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); 1837 1837 1838 - offset = reloc->lobj.gpu_offset + tmp; 1838 + offset = reloc->gpu_offset + tmp; 1839 1839 1840 1840 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 1841 1841 dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", ··· 1861 1861 DRM_ERROR("bad SURFACE_SYNC\n"); 1862 1862 return -EINVAL; 1863 1863 } 1864 - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1864 + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1865 1865 } 1866 1866 break; 1867 1867 case PACKET3_EVENT_WRITE: ··· 1877 1877 DRM_ERROR("bad EVENT_WRITE\n"); 1878 1878 return -EINVAL; 1879 1879 } 1880 - offset = reloc->lobj.gpu_offset + 1880 + offset = reloc->gpu_offset + 1881 1881 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + 1882 1882 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1883 1883 ··· 1899 1899 return -EINVAL; 1900 1900 } 1901 1901 1902 - offset = reloc->lobj.gpu_offset + 1902 + offset = reloc->gpu_offset + 1903 1903 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 1904 1904 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1905 1905 ··· 1964 1964 DRM_ERROR("bad SET_RESOURCE\n"); 1965 1965 return -EINVAL; 1966 1966 } 1967 - base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1967 + base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1968 1968 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1969 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1969 + if (reloc->tiling_flags & RADEON_TILING_MACRO) 1970 1970 ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1971 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1971 + else if (reloc->tiling_flags & RADEON_TILING_MICRO) 1972 1972 ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 1973 1973 } 1974 1974 texture = reloc->robj; ··· 1978 1978 DRM_ERROR("bad SET_RESOURCE\n"); 1979 1979 return -EINVAL; 1980 1980 } 1981 - mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1981 + mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1982 1982 mipmap = reloc->robj; 1983 1983 r = r600_check_texture_resource(p, idx+(i*7)+1, 1984 1984 texture, mipmap, 1985 1985 base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), 1986 1986 mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), 1987 - reloc->lobj.tiling_flags); 1987 + reloc->tiling_flags); 1988 1988 if (r) 1989 1989 return r; 1990 1990 ib[idx+1+(i*7)+2] += base_offset; ··· 2008 2008 ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; 2009 2009 } 2010 2010 2011 - offset64 = reloc->lobj.gpu_offset + offset; 2011 + offset64 = reloc->gpu_offset + offset; 2012 2012 ib[idx+1+(i*8)+0] = offset64; 2013 2013 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | 2014 2014 (upper_32_bits(offset64) & 0xff); ··· 2118 2118 offset + 4, radeon_bo_size(reloc->robj)); 2119 2119 return -EINVAL; 2120 2120 } 2121 - ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 2121 + ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2122 2122 } 2123 2123 break; 2124 2124 case PACKET3_SURFACE_BASE_UPDATE: ··· 2151 2151 offset + 4, radeon_bo_size(reloc->robj)); 2152 2152 return -EINVAL; 2153 2153 } 2154 - offset += reloc->lobj.gpu_offset; 2154 + offset += reloc->gpu_offset; 2155 2155 ib[idx+1] = offset; 2156 2156 ib[idx+2] = upper_32_bits(offset) & 0xff; 2157 2157 } ··· 2170 2170 offset + 4, radeon_bo_size(reloc->robj)); 2171 2171 return -EINVAL; 2172 2172 } 2173 - offset += reloc->lobj.gpu_offset; 2173 + offset += reloc->gpu_offset; 2174 2174 ib[idx+3] = offset; 2175 2175 ib[idx+4] = upper_32_bits(offset) & 0xff; 2176 2176 } ··· 2199 2199 offset + 8, radeon_bo_size(reloc->robj)); 2200 2200 return -EINVAL; 2201 2201 } 2202 - offset += reloc->lobj.gpu_offset; 2202 + offset += reloc->gpu_offset; 2203 2203 ib[idx+0] = offset; 2204 2204 ib[idx+1] = upper_32_bits(offset) & 0xff; 2205 2205 break; ··· 2224 2224 offset + 4, radeon_bo_size(reloc->robj)); 2225 2225 return -EINVAL; 2226 2226 } 2227 - offset += reloc->lobj.gpu_offset; 2227 + offset += reloc->gpu_offset; 2228 2228 ib[idx+1] = offset; 2229 2229 ib[idx+2] = upper_32_bits(offset) & 0xff; 2230 2230 } else { ··· 2248 2248 offset + 4, radeon_bo_size(reloc->robj)); 2249 2249 return -EINVAL; 2250 2250 } 2251 - offset += reloc->lobj.gpu_offset; 2251 + offset += reloc->gpu_offset; 2252 2252 ib[idx+3] = offset; 2253 2253 ib[idx+4] = upper_32_bits(offset) & 0xff; 2254 2254 } else { ··· 2505 2505 dst_offset = radeon_get_ib_value(p, idx+1); 2506 2506 dst_offset <<= 8; 2507 2507 2508 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2508 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2509 2509 p->idx += count + 5; 2510 2510 } else { 2511 2511 dst_offset = radeon_get_ib_value(p, idx+1); 2512 2512 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2513 2513 2514 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2515 - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2514 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2515 + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2516 2516 p->idx += count + 3; 2517 2517 } 2518 2518 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { ··· 2539 2539 /* tiled src, linear dst */ 2540 2540 src_offset = radeon_get_ib_value(p, idx+1); 2541 2541 src_offset <<= 8; 2542 - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2542 + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2543 2543 2544 2544 dst_offset = radeon_get_ib_value(p, idx+5); 2545 2545 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; 2546 - ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2547 - ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2546 + ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2547 + ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2548 2548 } else { 2549 2549 /* linear src, tiled dst */ 2550 2550 src_offset = radeon_get_ib_value(p, idx+5); 2551 2551 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; 2552 - ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2553 - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2552 + ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2553 + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2554 2554 2555 2555 dst_offset = radeon_get_ib_value(p, idx+1); 2556 2556 dst_offset <<= 8; 2557 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2557 + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2558 2558 } 2559 2559 p->idx += 7; 2560 2560 } else { ··· 2564 2564 dst_offset = radeon_get_ib_value(p, idx+1); 2565 2565 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; 2566 2566 2567 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2568 - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2569 - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2570 - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2567 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2568 + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2569 + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2570 + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2571 2571 p->idx += 5; 2572 2572 } else { 2573 2573 src_offset = radeon_get_ib_value(p, idx+2); ··· 2575 2575 dst_offset = radeon_get_ib_value(p, idx+1); 2576 2576 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16; 2577 2577 2578 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2579 - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2580 - ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2581 - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16; 2578 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2579 + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2580 + ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2581 + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16; 2582 2582 p->idx += 4; 2583 2583 } 2584 2584 } ··· 2610 2610 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2611 2611 return -EINVAL; 2612 2612 } 2613 - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2614 - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; 2613 + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2614 + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; 2615 2615 p->idx += 4; 2616 2616 break; 2617 2617 case DMA_PACKET_NOP:
+34 -27
drivers/gpu/drm/radeon/radeon.h
··· 363 363 void radeon_fence_process(struct radeon_device *rdev, int ring); 364 364 bool radeon_fence_signaled(struct radeon_fence *fence); 365 365 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); 366 - int radeon_fence_wait_locked(struct radeon_fence *fence); 367 - int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); 368 - int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); 366 + int radeon_fence_wait_next(struct radeon_device *rdev, int ring); 367 + int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); 369 368 int radeon_fence_wait_any(struct radeon_device *rdev, 370 369 struct radeon_fence **fences, 371 370 bool intr); ··· 456 457 /* Protected by gem.mutex */ 457 458 struct list_head list; 458 459 /* Protected by tbo.reserved */ 460 + u32 initial_domain; 459 461 u32 placements[3]; 460 462 struct ttm_placement placement; 461 463 struct ttm_buffer_object tbo; ··· 478 478 pid_t pid; 479 479 }; 480 480 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) 481 - 482 - struct radeon_bo_list { 483 - struct ttm_validate_buffer tv; 484 - struct radeon_bo *bo; 485 - uint64_t gpu_offset; 486 - bool written; 487 - unsigned domain; 488 - unsigned alt_domain; 489 - u32 tiling_flags; 490 - }; 491 481 492 482 int radeon_gem_debugfs_init(struct radeon_device *rdev); 493 483 ··· 795 805 unsigned ring_size; 796 806 unsigned ring_free_dw; 797 807 int count_dw; 798 - unsigned long last_activity; 799 - unsigned last_rptr; 808 + atomic_t last_rptr; 809 + atomic64_t last_activity; 800 810 uint64_t gpu_addr; 801 811 uint32_t align_mask; 802 812 uint32_t ptr_mask; ··· 848 858 #define R600_PTE_READABLE (1 << 5) 849 859 #define R600_PTE_WRITEABLE (1 << 6) 850 860 861 + struct radeon_vm_pt { 862 + struct radeon_bo *bo; 863 + uint64_t addr; 864 + }; 865 + 851 866 struct radeon_vm { 852 - struct list_head list; 853 867 struct list_head va; 854 868 unsigned id; 855 869 856 870 /* contains the page directory */ 857 - struct radeon_sa_bo *page_directory; 871 + struct radeon_bo *page_directory; 858 872 uint64_t pd_gpu_addr; 873 + unsigned max_pde_used; 859 874 860 875 /* array of page tables, one for each page directory entry */ 861 - struct radeon_sa_bo **page_tables; 876 + struct radeon_vm_pt *page_tables; 862 877 863 878 struct mutex mutex; 864 879 /* last fence for cs using this vm */ ··· 875 880 }; 876 881 877 882 struct radeon_vm_manager { 878 - struct mutex lock; 879 - struct list_head lru_vm; 880 883 struct radeon_fence *active[RADEON_NUM_VM]; 881 - struct radeon_sa_manager sa_manager; 882 884 uint32_t max_pfn; 883 885 /* number of VMIDs */ 884 886 unsigned nvm; ··· 978 986 struct radeon_cs_reloc { 979 987 struct drm_gem_object *gobj; 980 988 struct radeon_bo *robj; 981 - struct radeon_bo_list lobj; 989 + struct ttm_validate_buffer tv; 990 + uint64_t gpu_offset; 991 + unsigned domain; 992 + unsigned alt_domain; 993 + uint32_t tiling_flags; 982 994 uint32_t handle; 983 - uint32_t flags; 984 995 }; 985 996 986 997 struct radeon_cs_chunk { ··· 1007 1012 unsigned nrelocs; 1008 1013 struct radeon_cs_reloc *relocs; 1009 1014 struct radeon_cs_reloc **relocs_ptr; 1015 + struct radeon_cs_reloc *vm_bos; 1010 1016 struct list_head validated; 1011 1017 unsigned dma_reloc_idx; 1012 1018 /* indices of various chunks */ ··· 1631 1635 1632 1636 struct radeon_vce { 1633 1637 struct radeon_bo *vcpu_bo; 1634 - void *cpu_addr; 1635 1638 uint64_t gpu_addr; 1636 1639 unsigned fw_version; 1637 1640 unsigned fb_version; ··· 2112 2117 struct drm_file *filp); 2113 2118 int radeon_gem_va_ioctl(struct drm_device *dev, void *data, 2114 2119 struct drm_file *filp); 2120 + int radeon_gem_op_ioctl(struct drm_device *dev, void *data, 2121 + struct drm_file *filp); 2115 2122 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 2116 2123 int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, 2117 2124 struct drm_file *filp); ··· 2304 2307 /* virtual memory */ 2305 2308 struct radeon_vm_manager vm_manager; 2306 2309 struct mutex gpu_clock_mutex; 2310 + /* memory stats */ 2311 + atomic64_t vram_usage; 2312 + atomic64_t gtt_usage; 2313 + atomic64_t num_bytes_moved; 2307 2314 /* ACPI interface */ 2308 2315 struct radeon_atif atif; 2309 2316 struct radeon_atcs atcs; ··· 2795 2794 */ 2796 2795 int radeon_vm_manager_init(struct radeon_device *rdev); 2797 2796 void radeon_vm_manager_fini(struct radeon_device *rdev); 2798 - void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); 2797 + int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); 2799 2798 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); 2800 - int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); 2801 - void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); 2799 + struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, 2800 + struct radeon_vm *vm, 2801 + struct list_head *head); 2802 2802 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 2803 2803 struct radeon_vm *vm, int ring); 2804 + void radeon_vm_flush(struct radeon_device *rdev, 2805 + struct radeon_vm *vm, 2806 + int ring); 2804 2807 void radeon_vm_fence(struct radeon_device *rdev, 2805 2808 struct radeon_vm *vm, 2806 2809 struct radeon_fence *fence); 2807 2810 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); 2811 + int radeon_vm_update_page_directory(struct radeon_device *rdev, 2812 + struct radeon_vm *vm); 2808 2813 int radeon_vm_bo_update(struct radeon_device *rdev, 2809 2814 struct radeon_vm *vm, 2810 2815 struct radeon_bo *bo,
+118 -38
drivers/gpu/drm/radeon/radeon_cs.c
··· 24 24 * Authors: 25 25 * Jerome Glisse <glisse@freedesktop.org> 26 26 */ 27 + #include <linux/list_sort.h> 27 28 #include <drm/drmP.h> 28 29 #include <drm/radeon_drm.h> 29 30 #include "radeon_reg.h" 30 31 #include "radeon.h" 31 32 #include "radeon_trace.h" 32 33 34 + #define RADEON_CS_MAX_PRIORITY 32u 35 + #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 36 + 37 + /* This is based on the bucket sort with O(n) time complexity. 38 + * An item with priority "i" is added to bucket[i]. The lists are then 39 + * concatenated in descending order. 40 + */ 41 + struct radeon_cs_buckets { 42 + struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 43 + }; 44 + 45 + static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 46 + { 47 + unsigned i; 48 + 49 + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 50 + INIT_LIST_HEAD(&b->bucket[i]); 51 + } 52 + 53 + static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 54 + struct list_head *item, unsigned priority) 55 + { 56 + /* Since buffers which appear sooner in the relocation list are 57 + * likely to be used more often than buffers which appear later 58 + * in the list, the sort mustn't change the ordering of buffers 59 + * with the same priority, i.e. it must be stable. 60 + */ 61 + list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 62 + } 63 + 64 + static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 65 + struct list_head *out_list) 66 + { 67 + unsigned i; 68 + 69 + /* Connect the sorted buckets in the output list. */ 70 + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 71 + list_splice(&b->bucket[i], out_list); 72 + } 73 + } 74 + 33 75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 34 76 { 35 77 struct drm_device *ddev = p->rdev->ddev; 36 78 struct radeon_cs_chunk *chunk; 79 + struct radeon_cs_buckets buckets; 37 80 unsigned i, j; 38 81 bool duplicate; 39 82 ··· 95 52 if (p->relocs == NULL) { 96 53 return -ENOMEM; 97 54 } 55 + 56 + radeon_cs_buckets_init(&buckets); 57 + 98 58 for (i = 0; i < p->nrelocs; i++) { 99 59 struct drm_radeon_cs_reloc *r; 60 + unsigned priority; 100 61 101 62 duplicate = false; 102 63 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; ··· 125 78 } 126 79 p->relocs_ptr[i] = &p->relocs[i]; 127 80 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 128 - p->relocs[i].lobj.bo = p->relocs[i].robj; 129 - p->relocs[i].lobj.written = !!r->write_domain; 81 + 82 + /* The userspace buffer priorities are from 0 to 15. A higher 83 + * number means the buffer is more important. 84 + * Also, the buffers used for write have a higher priority than 85 + * the buffers used for read only, which doubles the range 86 + * to 0 to 31. 32 is reserved for the kernel driver. 87 + */ 88 + priority = (r->flags & 0xf) * 2 + !!r->write_domain; 130 89 131 90 /* the first reloc of an UVD job is the msg and that must be in 132 91 VRAM, also but everything into VRAM on AGP cards to avoid ··· 140 87 if (p->ring == R600_RING_TYPE_UVD_INDEX && 141 88 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { 142 89 /* TODO: is this still needed for NI+ ? */ 143 - p->relocs[i].lobj.domain = 90 + p->relocs[i].domain = 144 91 RADEON_GEM_DOMAIN_VRAM; 145 92 146 - p->relocs[i].lobj.alt_domain = 93 + p->relocs[i].alt_domain = 147 94 RADEON_GEM_DOMAIN_VRAM; 148 95 96 + /* prioritize this over any other relocation */ 97 + priority = RADEON_CS_MAX_PRIORITY; 149 98 } else { 150 99 uint32_t domain = r->write_domain ? 151 100 r->write_domain : r->read_domains; 152 101 153 - p->relocs[i].lobj.domain = domain; 102 + p->relocs[i].domain = domain; 154 103 if (domain == RADEON_GEM_DOMAIN_VRAM) 155 104 domain |= RADEON_GEM_DOMAIN_GTT; 156 - p->relocs[i].lobj.alt_domain = domain; 105 + p->relocs[i].alt_domain = domain; 157 106 } 158 107 159 - p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; 108 + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 160 109 p->relocs[i].handle = r->handle; 161 110 162 - radeon_bo_list_add_object(&p->relocs[i].lobj, 163 - &p->validated); 111 + radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 112 + priority); 164 113 } 165 - return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); 114 + 115 + radeon_cs_buckets_get_list(&buckets, &p->validated); 116 + 117 + if (p->cs_flags & RADEON_CS_USE_VM) 118 + p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 119 + &p->validated); 120 + 121 + return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 166 122 } 167 123 168 124 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) ··· 352 290 return 0; 353 291 } 354 292 293 + static int cmp_size_smaller_first(void *priv, struct list_head *a, 294 + struct list_head *b) 295 + { 296 + struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); 297 + struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); 298 + 299 + /* Sort A before B if A is smaller. */ 300 + return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 301 + } 302 + 355 303 /** 356 304 * cs_parser_fini() - clean parser states 357 305 * @parser: parser structure holding parsing context. ··· 375 303 unsigned i; 376 304 377 305 if (!error) { 306 + /* Sort the buffer list from the smallest to largest buffer, 307 + * which affects the order of buffers in the LRU list. 308 + * This assures that the smallest buffers are added first 309 + * to the LRU list, so they are likely to be later evicted 310 + * first, instead of large buffers whose eviction is more 311 + * expensive. 312 + * 313 + * This slightly lowers the number of bytes moved by TTM 314 + * per frame under memory pressure. 315 + */ 316 + list_sort(NULL, &parser->validated, cmp_size_smaller_first); 317 + 378 318 ttm_eu_fence_buffer_objects(&parser->ticket, 379 319 &parser->validated, 380 320 parser->ib.fence); ··· 404 320 kfree(parser->track); 405 321 kfree(parser->relocs); 406 322 kfree(parser->relocs_ptr); 323 + kfree(parser->vm_bos); 407 324 for (i = 0; i < parser->nchunks; i++) 408 325 drm_free_large(parser->chunks[i].kdata); 409 326 kfree(parser->chunks); ··· 444 359 return r; 445 360 } 446 361 447 - static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, 362 + static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 448 363 struct radeon_vm *vm) 449 364 { 450 - struct radeon_device *rdev = parser->rdev; 451 - struct radeon_bo_list *lobj; 452 - struct radeon_bo *bo; 453 - int r; 365 + struct radeon_device *rdev = p->rdev; 366 + int i, r; 454 367 455 - r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); 456 - if (r) { 368 + r = radeon_vm_update_page_directory(rdev, vm); 369 + if (r) 457 370 return r; 458 - } 459 - list_for_each_entry(lobj, &parser->validated, tv.head) { 460 - bo = lobj->bo; 461 - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); 462 - if (r) { 371 + 372 + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, 373 + &rdev->ring_tmp_bo.bo->tbo.mem); 374 + if (r) 375 + return r; 376 + 377 + for (i = 0; i < p->nrelocs; i++) { 378 + struct radeon_bo *bo; 379 + 380 + /* ignore duplicates */ 381 + if (p->relocs_ptr[i] != &p->relocs[i]) 382 + continue; 383 + 384 + bo = p->relocs[i].robj; 385 + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); 386 + if (r) 463 387 return r; 464 - } 465 388 } 466 389 return 0; 467 390 } ··· 501 408 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 502 409 radeon_uvd_note_usage(rdev); 503 410 504 - mutex_lock(&rdev->vm_manager.lock); 505 411 mutex_lock(&vm->mutex); 506 - r = radeon_vm_alloc_pt(rdev, vm); 507 - if (r) { 508 - goto out; 509 - } 510 412 r = radeon_bo_vm_update_pte(parser, vm); 511 413 if (r) { 512 414 goto out; 513 415 } 514 416 radeon_cs_sync_rings(parser); 515 417 radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); 516 - radeon_semaphore_sync_to(parser->ib.semaphore, 517 - radeon_vm_grab_id(rdev, vm, parser->ring)); 518 418 519 419 if ((rdev->family >= CHIP_TAHITI) && 520 420 (parser->chunk_const_ib_idx != -1)) { ··· 516 430 r = radeon_ib_schedule(rdev, &parser->ib, NULL); 517 431 } 518 432 519 - if (!r) { 520 - radeon_vm_fence(rdev, vm, parser->ib.fence); 521 - } 522 - 523 433 out: 524 - radeon_vm_add_to_lru(rdev, vm); 525 434 mutex_unlock(&vm->mutex); 526 - mutex_unlock(&rdev->vm_manager.lock); 527 435 return r; 528 436 } 529 437 ··· 785 705 /* FIXME: we assume reloc size is 4 dwords */ 786 706 if (nomm) { 787 707 *cs_reloc = p->relocs; 788 - (*cs_reloc)->lobj.gpu_offset = 708 + (*cs_reloc)->gpu_offset = 789 709 (u64)relocs_chunk->kdata[idx + 3] << 32; 790 - (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; 710 + (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 791 711 } else 792 712 *cs_reloc = p->relocs_ptr[(idx / 4)]; 793 713 return 0;
+2 -6
drivers/gpu/drm/radeon/radeon_device.c
··· 1191 1191 r = radeon_gem_init(rdev); 1192 1192 if (r) 1193 1193 return r; 1194 - /* initialize vm here */ 1195 - mutex_init(&rdev->vm_manager.lock); 1194 + 1196 1195 /* Adjust VM size here. 1197 1196 * Currently set to 4GB ((1 << 20) 4k pages). 1198 1197 * Max GPUVM size for cayman and SI is 40 bits. 1199 1198 */ 1200 1199 rdev->vm_manager.max_pfn = 1 << 20; 1201 - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); 1202 1200 1203 1201 /* Set asic functions */ 1204 1202 r = radeon_asic_init(rdev); ··· 1443 1445 /* evict vram memory */ 1444 1446 radeon_bo_evict_vram(rdev); 1445 1447 1446 - mutex_lock(&rdev->ring_lock); 1447 1448 /* wait for gpu to finish processing current batch */ 1448 1449 for (i = 0; i < RADEON_NUM_RINGS; i++) { 1449 - r = radeon_fence_wait_empty_locked(rdev, i); 1450 + r = radeon_fence_wait_empty(rdev, i); 1450 1451 if (r) { 1451 1452 /* delay GPU reset to resume */ 1452 1453 force_completion = true; ··· 1454 1457 if (force_completion) { 1455 1458 radeon_fence_driver_force_completion(rdev); 1456 1459 } 1457 - mutex_unlock(&rdev->ring_lock); 1458 1460 1459 1461 radeon_save_bios_scratch_regs(rdev); 1460 1462
+2 -1
drivers/gpu/drm/radeon/radeon_drv.c
··· 79 79 * 2.35.0 - Add CIK macrotile mode array query 80 80 * 2.36.0 - Fix CIK DCE tiling setup 81 81 * 2.37.0 - allow GS ring setup on r6xx/r7xx 82 + * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN) 82 83 */ 83 84 #define KMS_DRIVER_MAJOR 2 84 - #define KMS_DRIVER_MINOR 37 85 + #define KMS_DRIVER_MINOR 38 85 86 #define KMS_DRIVER_PATCHLEVEL 0 86 87 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 87 88 int radeon_driver_unload_kms(struct drm_device *dev);
+10 -49
drivers/gpu/drm/radeon/radeon_fence.c
··· 288 288 * @rdev: radeon device pointer 289 289 * @target_seq: sequence number(s) we want to wait for 290 290 * @intr: use interruptable sleep 291 - * @lock_ring: whether the ring should be locked or not 292 291 * 293 292 * Wait for the requested sequence number(s) to be written by any ring 294 293 * (all asics). Sequnce number array is indexed by ring id. ··· 298 299 * -EDEADLK is returned when a GPU lockup has been detected. 299 300 */ 300 301 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, 301 - bool intr, bool lock_ring) 302 + bool intr) 302 303 { 303 304 uint64_t last_seq[RADEON_NUM_RINGS]; 304 305 bool signaled; ··· 357 358 if (i != RADEON_NUM_RINGS) 358 359 continue; 359 360 360 - if (lock_ring) 361 - mutex_lock(&rdev->ring_lock); 362 - 363 361 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 364 362 if (!target_seq[i]) 365 363 continue; ··· 374 378 375 379 /* remember that we need an reset */ 376 380 rdev->needs_reset = true; 377 - if (lock_ring) 378 - mutex_unlock(&rdev->ring_lock); 379 381 wake_up_all(&rdev->fence_queue); 380 382 return -EDEADLK; 381 383 } 382 - 383 - if (lock_ring) 384 - mutex_unlock(&rdev->ring_lock); 385 384 } 386 385 } 387 386 return 0; ··· 407 416 if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) 408 417 return 0; 409 418 410 - r = radeon_fence_wait_seq(fence->rdev, seq, intr, true); 419 + r = radeon_fence_wait_seq(fence->rdev, seq, intr); 411 420 if (r) 412 421 return r; 413 422 ··· 455 464 if (num_rings == 0) 456 465 return -ENOENT; 457 466 458 - r = radeon_fence_wait_seq(rdev, seq, intr, true); 467 + r = radeon_fence_wait_seq(rdev, seq, intr); 459 468 if (r) { 460 469 return r; 461 470 } ··· 463 472 } 464 473 465 474 /** 466 - * radeon_fence_wait_locked - wait for a fence to signal 467 - * 468 - * @fence: radeon fence object 469 - * 470 - * Wait for the requested fence to signal (all asics). 471 - * Returns 0 if the fence has passed, error for all other cases. 472 - */ 473 - int radeon_fence_wait_locked(struct radeon_fence *fence) 474 - { 475 - uint64_t seq[RADEON_NUM_RINGS] = {}; 476 - int r; 477 - 478 - if (fence == NULL) { 479 - WARN(1, "Querying an invalid fence : %p !\n", fence); 480 - return -EINVAL; 481 - } 482 - 483 - seq[fence->ring] = fence->seq; 484 - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) 485 - return 0; 486 - 487 - r = radeon_fence_wait_seq(fence->rdev, seq, false, false); 488 - if (r) 489 - return r; 490 - 491 - fence->seq = RADEON_FENCE_SIGNALED_SEQ; 492 - return 0; 493 - } 494 - 495 - /** 496 - * radeon_fence_wait_next_locked - wait for the next fence to signal 475 + * radeon_fence_wait_next - wait for the next fence to signal 497 476 * 498 477 * @rdev: radeon device pointer 499 478 * @ring: ring index the fence is associated with ··· 472 511 * Returns 0 if the next fence has passed, error for all other cases. 473 512 * Caller must hold ring lock. 474 513 */ 475 - int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) 514 + int radeon_fence_wait_next(struct radeon_device *rdev, int ring) 476 515 { 477 516 uint64_t seq[RADEON_NUM_RINGS] = {}; 478 517 ··· 482 521 already the last emited fence */ 483 522 return -ENOENT; 484 523 } 485 - return radeon_fence_wait_seq(rdev, seq, false, false); 524 + return radeon_fence_wait_seq(rdev, seq, false); 486 525 } 487 526 488 527 /** 489 - * radeon_fence_wait_empty_locked - wait for all fences to signal 528 + * radeon_fence_wait_empty - wait for all fences to signal 490 529 * 491 530 * @rdev: radeon device pointer 492 531 * @ring: ring index the fence is associated with ··· 495 534 * Returns 0 if the fences have passed, error for all other cases. 496 535 * Caller must hold ring lock. 497 536 */ 498 - int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) 537 + int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) 499 538 { 500 539 uint64_t seq[RADEON_NUM_RINGS] = {}; 501 540 int r; ··· 504 543 if (!seq[ring]) 505 544 return 0; 506 545 507 - r = radeon_fence_wait_seq(rdev, seq, false, false); 546 + r = radeon_fence_wait_seq(rdev, seq, false); 508 547 if (r) { 509 548 if (r == -EDEADLK) 510 549 return -EDEADLK; ··· 755 794 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 756 795 if (!rdev->fence_drv[ring].initialized) 757 796 continue; 758 - r = radeon_fence_wait_empty_locked(rdev, ring); 797 + r = radeon_fence_wait_empty(rdev, ring); 759 798 if (r) { 760 799 /* no need to trigger GPU reset as we are unloading */ 761 800 radeon_fence_driver_force_completion(rdev);
-958
drivers/gpu/drm/radeon/radeon_gart.c
··· 28 28 #include <drm/drmP.h> 29 29 #include <drm/radeon_drm.h> 30 30 #include "radeon.h" 31 - #include "radeon_reg.h" 32 - #include "radeon_trace.h" 33 31 34 32 /* 35 33 * GART ··· 391 393 rdev->gart.pages_addr = NULL; 392 394 393 395 radeon_dummy_page_fini(rdev); 394 - } 395 - 396 - /* 397 - * GPUVM 398 - * GPUVM is similar to the legacy gart on older asics, however 399 - * rather than there being a single global gart table 400 - * for the entire GPU, there are multiple VM page tables active 401 - * at any given time. The VM page tables can contain a mix 402 - * vram pages and system memory pages and system memory pages 403 - * can be mapped as snooped (cached system pages) or unsnooped 404 - * (uncached system pages). 405 - * Each VM has an ID associated with it and there is a page table 406 - * associated with each VMID. When execting a command buffer, 407 - * the kernel tells the the ring what VMID to use for that command 408 - * buffer. VMIDs are allocated dynamically as commands are submitted. 409 - * The userspace drivers maintain their own address space and the kernel 410 - * sets up their pages tables accordingly when they submit their 411 - * command buffers and a VMID is assigned. 412 - * Cayman/Trinity support up to 8 active VMs at any given time; 413 - * SI supports 16. 414 - */ 415 - 416 - /* 417 - * vm helpers 418 - * 419 - * TODO bind a default page at vm initialization for default address 420 - */ 421 - 422 - /** 423 - * radeon_vm_num_pde - return the number of page directory entries 424 - * 425 - * @rdev: radeon_device pointer 426 - * 427 - * Calculate the number of page directory entries (cayman+). 428 - */ 429 - static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 430 - { 431 - return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; 432 - } 433 - 434 - /** 435 - * radeon_vm_directory_size - returns the size of the page directory in bytes 436 - * 437 - * @rdev: radeon_device pointer 438 - * 439 - * Calculate the size of the page directory in bytes (cayman+). 440 - */ 441 - static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 442 - { 443 - return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 444 - } 445 - 446 - /** 447 - * radeon_vm_manager_init - init the vm manager 448 - * 449 - * @rdev: radeon_device pointer 450 - * 451 - * Init the vm manager (cayman+). 452 - * Returns 0 for success, error for failure. 453 - */ 454 - int radeon_vm_manager_init(struct radeon_device *rdev) 455 - { 456 - struct radeon_vm *vm; 457 - struct radeon_bo_va *bo_va; 458 - int r; 459 - unsigned size; 460 - 461 - if (!rdev->vm_manager.enabled) { 462 - /* allocate enough for 2 full VM pts */ 463 - size = radeon_vm_directory_size(rdev); 464 - size += rdev->vm_manager.max_pfn * 8; 465 - size *= 2; 466 - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, 467 - RADEON_GPU_PAGE_ALIGN(size), 468 - RADEON_VM_PTB_ALIGN_SIZE, 469 - RADEON_GEM_DOMAIN_VRAM); 470 - if (r) { 471 - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", 472 - (rdev->vm_manager.max_pfn * 8) >> 10); 473 - return r; 474 - } 475 - 476 - r = radeon_asic_vm_init(rdev); 477 - if (r) 478 - return r; 479 - 480 - rdev->vm_manager.enabled = true; 481 - 482 - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); 483 - if (r) 484 - return r; 485 - } 486 - 487 - /* restore page table */ 488 - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { 489 - if (vm->page_directory == NULL) 490 - continue; 491 - 492 - list_for_each_entry(bo_va, &vm->va, vm_list) { 493 - bo_va->valid = false; 494 - } 495 - } 496 - return 0; 497 - } 498 - 499 - /** 500 - * radeon_vm_free_pt - free the page table for a specific vm 501 - * 502 - * @rdev: radeon_device pointer 503 - * @vm: vm to unbind 504 - * 505 - * Free the page table of a specific vm (cayman+). 506 - * 507 - * Global and local mutex must be lock! 508 - */ 509 - static void radeon_vm_free_pt(struct radeon_device *rdev, 510 - struct radeon_vm *vm) 511 - { 512 - struct radeon_bo_va *bo_va; 513 - int i; 514 - 515 - if (!vm->page_directory) 516 - return; 517 - 518 - list_del_init(&vm->list); 519 - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); 520 - 521 - list_for_each_entry(bo_va, &vm->va, vm_list) { 522 - bo_va->valid = false; 523 - } 524 - 525 - if (vm->page_tables == NULL) 526 - return; 527 - 528 - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 529 - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); 530 - 531 - kfree(vm->page_tables); 532 - } 533 - 534 - /** 535 - * radeon_vm_manager_fini - tear down the vm manager 536 - * 537 - * @rdev: radeon_device pointer 538 - * 539 - * Tear down the VM manager (cayman+). 540 - */ 541 - void radeon_vm_manager_fini(struct radeon_device *rdev) 542 - { 543 - struct radeon_vm *vm, *tmp; 544 - int i; 545 - 546 - if (!rdev->vm_manager.enabled) 547 - return; 548 - 549 - mutex_lock(&rdev->vm_manager.lock); 550 - /* free all allocated page tables */ 551 - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { 552 - mutex_lock(&vm->mutex); 553 - radeon_vm_free_pt(rdev, vm); 554 - mutex_unlock(&vm->mutex); 555 - } 556 - for (i = 0; i < RADEON_NUM_VM; ++i) { 557 - radeon_fence_unref(&rdev->vm_manager.active[i]); 558 - } 559 - radeon_asic_vm_fini(rdev); 560 - mutex_unlock(&rdev->vm_manager.lock); 561 - 562 - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); 563 - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); 564 - rdev->vm_manager.enabled = false; 565 - } 566 - 567 - /** 568 - * radeon_vm_evict - evict page table to make room for new one 569 - * 570 - * @rdev: radeon_device pointer 571 - * @vm: VM we want to allocate something for 572 - * 573 - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). 574 - * Returns 0 for success, -ENOMEM for failure. 575 - * 576 - * Global and local mutex must be locked! 577 - */ 578 - static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) 579 - { 580 - struct radeon_vm *vm_evict; 581 - 582 - if (list_empty(&rdev->vm_manager.lru_vm)) 583 - return -ENOMEM; 584 - 585 - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, 586 - struct radeon_vm, list); 587 - if (vm_evict == vm) 588 - return -ENOMEM; 589 - 590 - mutex_lock(&vm_evict->mutex); 591 - radeon_vm_free_pt(rdev, vm_evict); 592 - mutex_unlock(&vm_evict->mutex); 593 - return 0; 594 - } 595 - 596 - /** 597 - * radeon_vm_alloc_pt - allocates a page table for a VM 598 - * 599 - * @rdev: radeon_device pointer 600 - * @vm: vm to bind 601 - * 602 - * Allocate a page table for the requested vm (cayman+). 603 - * Returns 0 for success, error for failure. 604 - * 605 - * Global and local mutex must be locked! 606 - */ 607 - int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) 608 - { 609 - unsigned pd_size, pd_entries, pts_size; 610 - struct radeon_ib ib; 611 - int r; 612 - 613 - if (vm == NULL) { 614 - return -EINVAL; 615 - } 616 - 617 - if (vm->page_directory != NULL) { 618 - return 0; 619 - } 620 - 621 - pd_size = radeon_vm_directory_size(rdev); 622 - pd_entries = radeon_vm_num_pdes(rdev); 623 - 624 - retry: 625 - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, 626 - &vm->page_directory, pd_size, 627 - RADEON_VM_PTB_ALIGN_SIZE, false); 628 - if (r == -ENOMEM) { 629 - r = radeon_vm_evict(rdev, vm); 630 - if (r) 631 - return r; 632 - goto retry; 633 - 634 - } else if (r) { 635 - return r; 636 - } 637 - 638 - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); 639 - 640 - /* Initially clear the page directory */ 641 - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, 642 - NULL, pd_entries * 2 + 64); 643 - if (r) { 644 - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); 645 - return r; 646 - } 647 - 648 - ib.length_dw = 0; 649 - 650 - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, 651 - 0, pd_entries, 0, 0); 652 - 653 - radeon_semaphore_sync_to(ib.semaphore, vm->fence); 654 - r = radeon_ib_schedule(rdev, &ib, NULL); 655 - if (r) { 656 - radeon_ib_free(rdev, &ib); 657 - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); 658 - return r; 659 - } 660 - radeon_fence_unref(&vm->fence); 661 - vm->fence = radeon_fence_ref(ib.fence); 662 - radeon_ib_free(rdev, &ib); 663 - radeon_fence_unref(&vm->last_flush); 664 - 665 - /* allocate page table array */ 666 - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); 667 - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 668 - 669 - if (vm->page_tables == NULL) { 670 - DRM_ERROR("Cannot allocate memory for page table array\n"); 671 - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); 672 - return -ENOMEM; 673 - } 674 - 675 - return 0; 676 - } 677 - 678 - /** 679 - * radeon_vm_add_to_lru - add VMs page table to LRU list 680 - * 681 - * @rdev: radeon_device pointer 682 - * @vm: vm to add to LRU 683 - * 684 - * Add the allocated page table to the LRU list (cayman+). 685 - * 686 - * Global mutex must be locked! 687 - */ 688 - void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) 689 - { 690 - list_del_init(&vm->list); 691 - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); 692 - } 693 - 694 - /** 695 - * radeon_vm_grab_id - allocate the next free VMID 696 - * 697 - * @rdev: radeon_device pointer 698 - * @vm: vm to allocate id for 699 - * @ring: ring we want to submit job to 700 - * 701 - * Allocate an id for the vm (cayman+). 702 - * Returns the fence we need to sync to (if any). 703 - * 704 - * Global and local mutex must be locked! 705 - */ 706 - struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 707 - struct radeon_vm *vm, int ring) 708 - { 709 - struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 710 - unsigned choices[2] = {}; 711 - unsigned i; 712 - 713 - /* check if the id is still valid */ 714 - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) 715 - return NULL; 716 - 717 - /* we definately need to flush */ 718 - radeon_fence_unref(&vm->last_flush); 719 - 720 - /* skip over VMID 0, since it is the system VM */ 721 - for (i = 1; i < rdev->vm_manager.nvm; ++i) { 722 - struct radeon_fence *fence = rdev->vm_manager.active[i]; 723 - 724 - if (fence == NULL) { 725 - /* found a free one */ 726 - vm->id = i; 727 - trace_radeon_vm_grab_id(vm->id, ring); 728 - return NULL; 729 - } 730 - 731 - if (radeon_fence_is_earlier(fence, best[fence->ring])) { 732 - best[fence->ring] = fence; 733 - choices[fence->ring == ring ? 0 : 1] = i; 734 - } 735 - } 736 - 737 - for (i = 0; i < 2; ++i) { 738 - if (choices[i]) { 739 - vm->id = choices[i]; 740 - trace_radeon_vm_grab_id(vm->id, ring); 741 - return rdev->vm_manager.active[choices[i]]; 742 - } 743 - } 744 - 745 - /* should never happen */ 746 - BUG(); 747 - return NULL; 748 - } 749 - 750 - /** 751 - * radeon_vm_fence - remember fence for vm 752 - * 753 - * @rdev: radeon_device pointer 754 - * @vm: vm we want to fence 755 - * @fence: fence to remember 756 - * 757 - * Fence the vm (cayman+). 758 - * Set the fence used to protect page table and id. 759 - * 760 - * Global and local mutex must be locked! 761 - */ 762 - void radeon_vm_fence(struct radeon_device *rdev, 763 - struct radeon_vm *vm, 764 - struct radeon_fence *fence) 765 - { 766 - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); 767 - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); 768 - 769 - radeon_fence_unref(&vm->fence); 770 - vm->fence = radeon_fence_ref(fence); 771 - 772 - radeon_fence_unref(&vm->last_id_use); 773 - vm->last_id_use = radeon_fence_ref(fence); 774 - } 775 - 776 - /** 777 - * radeon_vm_bo_find - find the bo_va for a specific vm & bo 778 - * 779 - * @vm: requested vm 780 - * @bo: requested buffer object 781 - * 782 - * Find @bo inside the requested vm (cayman+). 783 - * Search inside the @bos vm list for the requested vm 784 - * Returns the found bo_va or NULL if none is found 785 - * 786 - * Object has to be reserved! 787 - */ 788 - struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 789 - struct radeon_bo *bo) 790 - { 791 - struct radeon_bo_va *bo_va; 792 - 793 - list_for_each_entry(bo_va, &bo->va, bo_list) { 794 - if (bo_va->vm == vm) { 795 - return bo_va; 796 - } 797 - } 798 - return NULL; 799 - } 800 - 801 - /** 802 - * radeon_vm_bo_add - add a bo to a specific vm 803 - * 804 - * @rdev: radeon_device pointer 805 - * @vm: requested vm 806 - * @bo: radeon buffer object 807 - * 808 - * Add @bo into the requested vm (cayman+). 809 - * Add @bo to the list of bos associated with the vm 810 - * Returns newly added bo_va or NULL for failure 811 - * 812 - * Object has to be reserved! 813 - */ 814 - struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 815 - struct radeon_vm *vm, 816 - struct radeon_bo *bo) 817 - { 818 - struct radeon_bo_va *bo_va; 819 - 820 - bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 821 - if (bo_va == NULL) { 822 - return NULL; 823 - } 824 - bo_va->vm = vm; 825 - bo_va->bo = bo; 826 - bo_va->soffset = 0; 827 - bo_va->eoffset = 0; 828 - bo_va->flags = 0; 829 - bo_va->valid = false; 830 - bo_va->ref_count = 1; 831 - INIT_LIST_HEAD(&bo_va->bo_list); 832 - INIT_LIST_HEAD(&bo_va->vm_list); 833 - 834 - mutex_lock(&vm->mutex); 835 - list_add(&bo_va->vm_list, &vm->va); 836 - list_add_tail(&bo_va->bo_list, &bo->va); 837 - mutex_unlock(&vm->mutex); 838 - 839 - return bo_va; 840 - } 841 - 842 - /** 843 - * radeon_vm_bo_set_addr - set bos virtual address inside a vm 844 - * 845 - * @rdev: radeon_device pointer 846 - * @bo_va: bo_va to store the address 847 - * @soffset: requested offset of the buffer in the VM address space 848 - * @flags: attributes of pages (read/write/valid/etc.) 849 - * 850 - * Set offset of @bo_va (cayman+). 851 - * Validate and set the offset requested within the vm address space. 852 - * Returns 0 for success, error for failure. 853 - * 854 - * Object has to be reserved! 855 - */ 856 - int radeon_vm_bo_set_addr(struct radeon_device *rdev, 857 - struct radeon_bo_va *bo_va, 858 - uint64_t soffset, 859 - uint32_t flags) 860 - { 861 - uint64_t size = radeon_bo_size(bo_va->bo); 862 - uint64_t eoffset, last_offset = 0; 863 - struct radeon_vm *vm = bo_va->vm; 864 - struct radeon_bo_va *tmp; 865 - struct list_head *head; 866 - unsigned last_pfn; 867 - 868 - if (soffset) { 869 - /* make sure object fit at this offset */ 870 - eoffset = soffset + size; 871 - if (soffset >= eoffset) { 872 - return -EINVAL; 873 - } 874 - 875 - last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 876 - if (last_pfn > rdev->vm_manager.max_pfn) { 877 - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 878 - last_pfn, rdev->vm_manager.max_pfn); 879 - return -EINVAL; 880 - } 881 - 882 - } else { 883 - eoffset = last_pfn = 0; 884 - } 885 - 886 - mutex_lock(&vm->mutex); 887 - head = &vm->va; 888 - last_offset = 0; 889 - list_for_each_entry(tmp, &vm->va, vm_list) { 890 - if (bo_va == tmp) { 891 - /* skip over currently modified bo */ 892 - continue; 893 - } 894 - 895 - if (soffset >= last_offset && eoffset <= tmp->soffset) { 896 - /* bo can be added before this one */ 897 - break; 898 - } 899 - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { 900 - /* bo and tmp overlap, invalid offset */ 901 - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", 902 - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, 903 - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); 904 - mutex_unlock(&vm->mutex); 905 - return -EINVAL; 906 - } 907 - last_offset = tmp->eoffset; 908 - head = &tmp->vm_list; 909 - } 910 - 911 - bo_va->soffset = soffset; 912 - bo_va->eoffset = eoffset; 913 - bo_va->flags = flags; 914 - bo_va->valid = false; 915 - list_move(&bo_va->vm_list, head); 916 - 917 - mutex_unlock(&vm->mutex); 918 - return 0; 919 - } 920 - 921 - /** 922 - * radeon_vm_map_gart - get the physical address of a gart page 923 - * 924 - * @rdev: radeon_device pointer 925 - * @addr: the unmapped addr 926 - * 927 - * Look up the physical address of the page that the pte resolves 928 - * to (cayman+). 929 - * Returns the physical address of the page. 930 - */ 931 - uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 932 - { 933 - uint64_t result; 934 - 935 - /* page table offset */ 936 - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; 937 - 938 - /* in case cpu page size != gpu page size*/ 939 - result |= addr & (~PAGE_MASK); 940 - 941 - return result; 942 - } 943 - 944 - /** 945 - * radeon_vm_page_flags - translate page flags to what the hw uses 946 - * 947 - * @flags: flags comming from userspace 948 - * 949 - * Translate the flags the userspace ABI uses to hw flags. 950 - */ 951 - static uint32_t radeon_vm_page_flags(uint32_t flags) 952 - { 953 - uint32_t hw_flags = 0; 954 - hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 955 - hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 956 - hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 957 - if (flags & RADEON_VM_PAGE_SYSTEM) { 958 - hw_flags |= R600_PTE_SYSTEM; 959 - hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 960 - } 961 - return hw_flags; 962 - } 963 - 964 - /** 965 - * radeon_vm_update_pdes - make sure that page directory is valid 966 - * 967 - * @rdev: radeon_device pointer 968 - * @vm: requested vm 969 - * @start: start of GPU address range 970 - * @end: end of GPU address range 971 - * 972 - * Allocates new page tables if necessary 973 - * and updates the page directory (cayman+). 974 - * Returns 0 for success, error for failure. 975 - * 976 - * Global and local mutex must be locked! 977 - */ 978 - static int radeon_vm_update_pdes(struct radeon_device *rdev, 979 - struct radeon_vm *vm, 980 - struct radeon_ib *ib, 981 - uint64_t start, uint64_t end) 982 - { 983 - static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; 984 - 985 - uint64_t last_pde = ~0, last_pt = ~0; 986 - unsigned count = 0; 987 - uint64_t pt_idx; 988 - int r; 989 - 990 - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; 991 - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; 992 - 993 - /* walk over the address space and update the page directory */ 994 - for (pt_idx = start; pt_idx <= end; ++pt_idx) { 995 - uint64_t pde, pt; 996 - 997 - if (vm->page_tables[pt_idx]) 998 - continue; 999 - 1000 - retry: 1001 - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, 1002 - &vm->page_tables[pt_idx], 1003 - RADEON_VM_PTE_COUNT * 8, 1004 - RADEON_GPU_PAGE_SIZE, false); 1005 - 1006 - if (r == -ENOMEM) { 1007 - r = radeon_vm_evict(rdev, vm); 1008 - if (r) 1009 - return r; 1010 - goto retry; 1011 - } else if (r) { 1012 - return r; 1013 - } 1014 - 1015 - pde = vm->pd_gpu_addr + pt_idx * 8; 1016 - 1017 - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); 1018 - 1019 - if (((last_pde + 8 * count) != pde) || 1020 - ((last_pt + incr * count) != pt)) { 1021 - 1022 - if (count) { 1023 - radeon_asic_vm_set_page(rdev, ib, last_pde, 1024 - last_pt, count, incr, 1025 - R600_PTE_VALID); 1026 - 1027 - count *= RADEON_VM_PTE_COUNT; 1028 - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, 1029 - count, 0, 0); 1030 - } 1031 - 1032 - count = 1; 1033 - last_pde = pde; 1034 - last_pt = pt; 1035 - } else { 1036 - ++count; 1037 - } 1038 - } 1039 - 1040 - if (count) { 1041 - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, 1042 - incr, R600_PTE_VALID); 1043 - 1044 - count *= RADEON_VM_PTE_COUNT; 1045 - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, 1046 - count, 0, 0); 1047 - } 1048 - 1049 - return 0; 1050 - } 1051 - 1052 - /** 1053 - * radeon_vm_update_ptes - make sure that page tables are valid 1054 - * 1055 - * @rdev: radeon_device pointer 1056 - * @vm: requested vm 1057 - * @start: start of GPU address range 1058 - * @end: end of GPU address range 1059 - * @dst: destination address to map to 1060 - * @flags: mapping flags 1061 - * 1062 - * Update the page tables in the range @start - @end (cayman+). 1063 - * 1064 - * Global and local mutex must be locked! 1065 - */ 1066 - static void radeon_vm_update_ptes(struct radeon_device *rdev, 1067 - struct radeon_vm *vm, 1068 - struct radeon_ib *ib, 1069 - uint64_t start, uint64_t end, 1070 - uint64_t dst, uint32_t flags) 1071 - { 1072 - static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; 1073 - 1074 - uint64_t last_pte = ~0, last_dst = ~0; 1075 - unsigned count = 0; 1076 - uint64_t addr; 1077 - 1078 - start = start / RADEON_GPU_PAGE_SIZE; 1079 - end = end / RADEON_GPU_PAGE_SIZE; 1080 - 1081 - /* walk over the address space and update the page tables */ 1082 - for (addr = start; addr < end; ) { 1083 - uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; 1084 - unsigned nptes; 1085 - uint64_t pte; 1086 - 1087 - if ((addr & ~mask) == (end & ~mask)) 1088 - nptes = end - addr; 1089 - else 1090 - nptes = RADEON_VM_PTE_COUNT - (addr & mask); 1091 - 1092 - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); 1093 - pte += (addr & mask) * 8; 1094 - 1095 - if ((last_pte + 8 * count) != pte) { 1096 - 1097 - if (count) { 1098 - radeon_asic_vm_set_page(rdev, ib, last_pte, 1099 - last_dst, count, 1100 - RADEON_GPU_PAGE_SIZE, 1101 - flags); 1102 - } 1103 - 1104 - count = nptes; 1105 - last_pte = pte; 1106 - last_dst = dst; 1107 - } else { 1108 - count += nptes; 1109 - } 1110 - 1111 - addr += nptes; 1112 - dst += nptes * RADEON_GPU_PAGE_SIZE; 1113 - } 1114 - 1115 - if (count) { 1116 - radeon_asic_vm_set_page(rdev, ib, last_pte, 1117 - last_dst, count, 1118 - RADEON_GPU_PAGE_SIZE, flags); 1119 - } 1120 - } 1121 - 1122 - /** 1123 - * radeon_vm_bo_update - map a bo into the vm page table 1124 - * 1125 - * @rdev: radeon_device pointer 1126 - * @vm: requested vm 1127 - * @bo: radeon buffer object 1128 - * @mem: ttm mem 1129 - * 1130 - * Fill in the page table entries for @bo (cayman+). 1131 - * Returns 0 for success, -EINVAL for failure. 1132 - * 1133 - * Object have to be reserved & global and local mutex must be locked! 1134 - */ 1135 - int radeon_vm_bo_update(struct radeon_device *rdev, 1136 - struct radeon_vm *vm, 1137 - struct radeon_bo *bo, 1138 - struct ttm_mem_reg *mem) 1139 - { 1140 - struct radeon_ib ib; 1141 - struct radeon_bo_va *bo_va; 1142 - unsigned nptes, npdes, ndw; 1143 - uint64_t addr; 1144 - int r; 1145 - 1146 - /* nothing to do if vm isn't bound */ 1147 - if (vm->page_directory == NULL) 1148 - return 0; 1149 - 1150 - bo_va = radeon_vm_bo_find(vm, bo); 1151 - if (bo_va == NULL) { 1152 - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 1153 - return -EINVAL; 1154 - } 1155 - 1156 - if (!bo_va->soffset) { 1157 - dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 1158 - bo, vm); 1159 - return -EINVAL; 1160 - } 1161 - 1162 - if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) 1163 - return 0; 1164 - 1165 - bo_va->flags &= ~RADEON_VM_PAGE_VALID; 1166 - bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 1167 - if (mem) { 1168 - addr = mem->start << PAGE_SHIFT; 1169 - if (mem->mem_type != TTM_PL_SYSTEM) { 1170 - bo_va->flags |= RADEON_VM_PAGE_VALID; 1171 - bo_va->valid = true; 1172 - } 1173 - if (mem->mem_type == TTM_PL_TT) { 1174 - bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 1175 - } else { 1176 - addr += rdev->vm_manager.vram_base_offset; 1177 - } 1178 - } else { 1179 - addr = 0; 1180 - bo_va->valid = false; 1181 - } 1182 - 1183 - trace_radeon_vm_bo_update(bo_va); 1184 - 1185 - nptes = radeon_bo_ngpu_pages(bo); 1186 - 1187 - /* assume two extra pdes in case the mapping overlaps the borders */ 1188 - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; 1189 - 1190 - /* padding, etc. */ 1191 - ndw = 64; 1192 - 1193 - if (RADEON_VM_BLOCK_SIZE > 11) 1194 - /* reserve space for one header for every 2k dwords */ 1195 - ndw += (nptes >> 11) * 4; 1196 - else 1197 - /* reserve space for one header for 1198 - every (1 << BLOCK_SIZE) entries */ 1199 - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; 1200 - 1201 - /* reserve space for pte addresses */ 1202 - ndw += nptes * 2; 1203 - 1204 - /* reserve space for one header for every 2k dwords */ 1205 - ndw += (npdes >> 11) * 4; 1206 - 1207 - /* reserve space for pde addresses */ 1208 - ndw += npdes * 2; 1209 - 1210 - /* reserve space for clearing new page tables */ 1211 - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; 1212 - 1213 - /* update too big for an IB */ 1214 - if (ndw > 0xfffff) 1215 - return -ENOMEM; 1216 - 1217 - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 1218 - if (r) 1219 - return r; 1220 - ib.length_dw = 0; 1221 - 1222 - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); 1223 - if (r) { 1224 - radeon_ib_free(rdev, &ib); 1225 - return r; 1226 - } 1227 - 1228 - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 1229 - addr, radeon_vm_page_flags(bo_va->flags)); 1230 - 1231 - radeon_semaphore_sync_to(ib.semaphore, vm->fence); 1232 - r = radeon_ib_schedule(rdev, &ib, NULL); 1233 - if (r) { 1234 - radeon_ib_free(rdev, &ib); 1235 - return r; 1236 - } 1237 - radeon_fence_unref(&vm->fence); 1238 - vm->fence = radeon_fence_ref(ib.fence); 1239 - radeon_ib_free(rdev, &ib); 1240 - radeon_fence_unref(&vm->last_flush); 1241 - 1242 - return 0; 1243 - } 1244 - 1245 - /** 1246 - * radeon_vm_bo_rmv - remove a bo to a specific vm 1247 - * 1248 - * @rdev: radeon_device pointer 1249 - * @bo_va: requested bo_va 1250 - * 1251 - * Remove @bo_va->bo from the requested vm (cayman+). 1252 - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and 1253 - * remove the ptes for @bo_va in the page table. 1254 - * Returns 0 for success. 1255 - * 1256 - * Object have to be reserved! 1257 - */ 1258 - int radeon_vm_bo_rmv(struct radeon_device *rdev, 1259 - struct radeon_bo_va *bo_va) 1260 - { 1261 - int r = 0; 1262 - 1263 - mutex_lock(&rdev->vm_manager.lock); 1264 - mutex_lock(&bo_va->vm->mutex); 1265 - if (bo_va->soffset) { 1266 - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); 1267 - } 1268 - mutex_unlock(&rdev->vm_manager.lock); 1269 - list_del(&bo_va->vm_list); 1270 - mutex_unlock(&bo_va->vm->mutex); 1271 - list_del(&bo_va->bo_list); 1272 - 1273 - kfree(bo_va); 1274 - return r; 1275 - } 1276 - 1277 - /** 1278 - * radeon_vm_bo_invalidate - mark the bo as invalid 1279 - * 1280 - * @rdev: radeon_device pointer 1281 - * @vm: requested vm 1282 - * @bo: radeon buffer object 1283 - * 1284 - * Mark @bo as invalid (cayman+). 1285 - */ 1286 - void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1287 - struct radeon_bo *bo) 1288 - { 1289 - struct radeon_bo_va *bo_va; 1290 - 1291 - list_for_each_entry(bo_va, &bo->va, bo_list) { 1292 - bo_va->valid = false; 1293 - } 1294 - } 1295 - 1296 - /** 1297 - * radeon_vm_init - initialize a vm instance 1298 - * 1299 - * @rdev: radeon_device pointer 1300 - * @vm: requested vm 1301 - * 1302 - * Init @vm fields (cayman+). 1303 - */ 1304 - void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1305 - { 1306 - vm->id = 0; 1307 - vm->fence = NULL; 1308 - vm->last_flush = NULL; 1309 - vm->last_id_use = NULL; 1310 - mutex_init(&vm->mutex); 1311 - INIT_LIST_HEAD(&vm->list); 1312 - INIT_LIST_HEAD(&vm->va); 1313 - } 1314 - 1315 - /** 1316 - * radeon_vm_fini - tear down a vm instance 1317 - * 1318 - * @rdev: radeon_device pointer 1319 - * @vm: requested vm 1320 - * 1321 - * Tear down @vm (cayman+). 1322 - * Unbind the VM and remove all bos from the vm bo list 1323 - */ 1324 - void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1325 - { 1326 - struct radeon_bo_va *bo_va, *tmp; 1327 - int r; 1328 - 1329 - mutex_lock(&rdev->vm_manager.lock); 1330 - mutex_lock(&vm->mutex); 1331 - radeon_vm_free_pt(rdev, vm); 1332 - mutex_unlock(&rdev->vm_manager.lock); 1333 - 1334 - if (!list_empty(&vm->va)) { 1335 - dev_err(rdev->dev, "still active bo inside vm\n"); 1336 - } 1337 - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { 1338 - list_del_init(&bo_va->vm_list); 1339 - r = radeon_bo_reserve(bo_va->bo, false); 1340 - if (!r) { 1341 - list_del_init(&bo_va->bo_list); 1342 - radeon_bo_unreserve(bo_va->bo); 1343 - kfree(bo_va); 1344 - } 1345 - } 1346 - radeon_fence_unref(&vm->fence); 1347 - radeon_fence_unref(&vm->last_flush); 1348 - radeon_fence_unref(&vm->last_id_use); 1349 - mutex_unlock(&vm->mutex); 1350 396 }
+37 -12
drivers/gpu/drm/radeon/radeon_gem.c
··· 344 344 } 345 345 robj = gem_to_radeon_bo(gobj); 346 346 r = radeon_bo_wait(robj, &cur_placement, true); 347 - switch (cur_placement) { 348 - case TTM_PL_VRAM: 349 - args->domain = RADEON_GEM_DOMAIN_VRAM; 350 - break; 351 - case TTM_PL_TT: 352 - args->domain = RADEON_GEM_DOMAIN_GTT; 353 - break; 354 - case TTM_PL_SYSTEM: 355 - args->domain = RADEON_GEM_DOMAIN_CPU; 356 - default: 357 - break; 358 - } 347 + args->domain = radeon_mem_type_to_domain(cur_placement); 359 348 drm_gem_object_unreference_unlocked(gobj); 360 349 r = radeon_gem_handle_lockup(rdev, r); 361 350 return r; ··· 518 529 } 519 530 out: 520 531 radeon_bo_unreserve(rbo); 532 + drm_gem_object_unreference_unlocked(gobj); 533 + return r; 534 + } 535 + 536 + int radeon_gem_op_ioctl(struct drm_device *dev, void *data, 537 + struct drm_file *filp) 538 + { 539 + struct drm_radeon_gem_op *args = data; 540 + struct drm_gem_object *gobj; 541 + struct radeon_bo *robj; 542 + int r; 543 + 544 + gobj = drm_gem_object_lookup(dev, filp, args->handle); 545 + if (gobj == NULL) { 546 + return -ENOENT; 547 + } 548 + robj = gem_to_radeon_bo(gobj); 549 + r = radeon_bo_reserve(robj, false); 550 + if (unlikely(r)) 551 + goto out; 552 + 553 + switch (args->op) { 554 + case RADEON_GEM_OP_GET_INITIAL_DOMAIN: 555 + args->value = robj->initial_domain; 556 + break; 557 + case RADEON_GEM_OP_SET_INITIAL_DOMAIN: 558 + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | 559 + RADEON_GEM_DOMAIN_GTT | 560 + RADEON_GEM_DOMAIN_CPU); 561 + break; 562 + default: 563 + r = -EINVAL; 564 + } 565 + 566 + radeon_bo_unreserve(robj); 567 + out: 521 568 drm_gem_object_unreference_unlocked(gobj); 522 569 return r; 523 570 }
+25 -1
drivers/gpu/drm/radeon/radeon_kms.c
··· 486 486 case RADEON_INFO_VCE_FB_VERSION: 487 487 *value = rdev->vce.fb_version; 488 488 break; 489 + case RADEON_INFO_NUM_BYTES_MOVED: 490 + value = (uint32_t*)&value64; 491 + value_size = sizeof(uint64_t); 492 + value64 = atomic64_read(&rdev->num_bytes_moved); 493 + break; 494 + case RADEON_INFO_VRAM_USAGE: 495 + value = (uint32_t*)&value64; 496 + value_size = sizeof(uint64_t); 497 + value64 = atomic64_read(&rdev->vram_usage); 498 + break; 499 + case RADEON_INFO_GTT_USAGE: 500 + value = (uint32_t*)&value64; 501 + value_size = sizeof(uint64_t); 502 + value64 = atomic64_read(&rdev->gtt_usage); 503 + break; 489 504 default: 490 505 DRM_DEBUG_KMS("Invalid request %d\n", info->request); 491 506 return -EINVAL; ··· 559 544 return -ENOMEM; 560 545 } 561 546 562 - radeon_vm_init(rdev, &fpriv->vm); 547 + r = radeon_vm_init(rdev, &fpriv->vm); 548 + if (r) 549 + return r; 550 + 551 + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); 552 + if (r) 553 + return r; 563 554 564 555 /* map the ib pool buffer read only into 565 556 * virtual address space */ ··· 574 553 r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, 575 554 RADEON_VM_PAGE_READABLE | 576 555 RADEON_VM_PAGE_SNOOPED); 556 + 557 + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); 577 558 if (r) { 578 559 radeon_vm_fini(rdev, &fpriv->vm); 579 560 kfree(fpriv); ··· 837 814 DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 838 815 DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 839 816 DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 817 + DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 840 818 }; 841 819 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
+119 -16
drivers/gpu/drm/radeon/radeon_object.c
··· 56 56 } 57 57 } 58 58 59 + static void radeon_update_memory_usage(struct radeon_bo *bo, 60 + unsigned mem_type, int sign) 61 + { 62 + struct radeon_device *rdev = bo->rdev; 63 + u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; 64 + 65 + switch (mem_type) { 66 + case TTM_PL_TT: 67 + if (sign > 0) 68 + atomic64_add(size, &rdev->gtt_usage); 69 + else 70 + atomic64_sub(size, &rdev->gtt_usage); 71 + break; 72 + case TTM_PL_VRAM: 73 + if (sign > 0) 74 + atomic64_add(size, &rdev->vram_usage); 75 + else 76 + atomic64_sub(size, &rdev->vram_usage); 77 + break; 78 + } 79 + } 80 + 59 81 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) 60 82 { 61 83 struct radeon_bo *bo; 62 84 63 85 bo = container_of(tbo, struct radeon_bo, tbo); 86 + 87 + radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); 88 + 64 89 mutex_lock(&bo->rdev->gem.mutex); 65 90 list_del_init(&bo->list); 66 91 mutex_unlock(&bo->rdev->gem.mutex); ··· 170 145 bo->surface_reg = -1; 171 146 INIT_LIST_HEAD(&bo->list); 172 147 INIT_LIST_HEAD(&bo->va); 148 + bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | 149 + RADEON_GEM_DOMAIN_GTT | 150 + RADEON_GEM_DOMAIN_CPU); 173 151 radeon_ttm_placement_from_domain(bo, domain); 174 152 /* Kernel allocation are uninterruptible */ 175 153 down_read(&rdev->pm.mclk_lock); ··· 366 338 arch_phys_wc_del(rdev->mc.vram_mtrr); 367 339 } 368 340 369 - void radeon_bo_list_add_object(struct radeon_bo_list *lobj, 370 - struct list_head *head) 341 + /* Returns how many bytes TTM can move per IB. 342 + */ 343 + static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) 371 344 { 372 - if (lobj->written) { 373 - list_add(&lobj->tv.head, head); 374 - } else { 375 - list_add_tail(&lobj->tv.head, head); 376 - } 345 + u64 real_vram_size = rdev->mc.real_vram_size; 346 + u64 vram_usage = atomic64_read(&rdev->vram_usage); 347 + 348 + /* This function is based on the current VRAM usage. 349 + * 350 + * - If all of VRAM is free, allow relocating the number of bytes that 351 + * is equal to 1/4 of the size of VRAM for this IB. 352 + 353 + * - If more than one half of VRAM is occupied, only allow relocating 354 + * 1 MB of data for this IB. 355 + * 356 + * - From 0 to one half of used VRAM, the threshold decreases 357 + * linearly. 358 + * __________________ 359 + * 1/4 of -|\ | 360 + * VRAM | \ | 361 + * | \ | 362 + * | \ | 363 + * | \ | 364 + * | \ | 365 + * | \ | 366 + * | \________|1 MB 367 + * |----------------| 368 + * VRAM 0 % 100 % 369 + * used used 370 + * 371 + * Note: It's a threshold, not a limit. The threshold must be crossed 372 + * for buffer relocations to stop, so any buffer of an arbitrary size 373 + * can be moved as long as the threshold isn't crossed before 374 + * the relocation takes place. We don't want to disable buffer 375 + * relocations completely. 376 + * 377 + * The idea is that buffers should be placed in VRAM at creation time 378 + * and TTM should only do a minimum number of relocations during 379 + * command submission. In practice, you need to submit at least 380 + * a dozen IBs to move all buffers to VRAM if they are in GTT. 381 + * 382 + * Also, things can get pretty crazy under memory pressure and actual 383 + * VRAM usage can change a lot, so playing safe even at 50% does 384 + * consistently increase performance. 385 + */ 386 + 387 + u64 half_vram = real_vram_size >> 1; 388 + u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 389 + u64 bytes_moved_threshold = half_free_vram >> 1; 390 + return max(bytes_moved_threshold, 1024*1024ull); 377 391 } 378 392 379 - int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, 393 + int radeon_bo_list_validate(struct radeon_device *rdev, 394 + struct ww_acquire_ctx *ticket, 380 395 struct list_head *head, int ring) 381 396 { 382 - struct radeon_bo_list *lobj; 397 + struct radeon_cs_reloc *lobj; 383 398 struct radeon_bo *bo; 384 - u32 domain; 385 399 int r; 400 + u64 bytes_moved = 0, initial_bytes_moved; 401 + u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); 386 402 387 403 r = ttm_eu_reserve_buffers(ticket, head); 388 404 if (unlikely(r != 0)) { 389 405 return r; 390 406 } 407 + 391 408 list_for_each_entry(lobj, head, tv.head) { 392 - bo = lobj->bo; 409 + bo = lobj->robj; 393 410 if (!bo->pin_count) { 394 - domain = lobj->domain; 395 - 411 + u32 domain = lobj->domain; 412 + u32 current_domain = 413 + radeon_mem_type_to_domain(bo->tbo.mem.mem_type); 414 + 415 + /* Check if this buffer will be moved and don't move it 416 + * if we have moved too many buffers for this IB already. 417 + * 418 + * Note that this allows moving at least one buffer of 419 + * any size, because it doesn't take the current "bo" 420 + * into account. We don't want to disallow buffer moves 421 + * completely. 422 + */ 423 + if (current_domain != RADEON_GEM_DOMAIN_CPU && 424 + (domain & current_domain) == 0 && /* will be moved */ 425 + bytes_moved > bytes_moved_threshold) { 426 + /* don't move it */ 427 + domain = current_domain; 428 + } 429 + 396 430 retry: 397 431 radeon_ttm_placement_from_domain(bo, domain); 398 432 if (ring == R600_RING_TYPE_UVD_INDEX) 399 433 radeon_uvd_force_into_uvd_segment(bo); 400 - r = ttm_bo_validate(&bo->tbo, &bo->placement, 401 - true, false); 434 + 435 + initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); 436 + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 437 + bytes_moved += atomic64_read(&rdev->num_bytes_moved) - 438 + initial_bytes_moved; 439 + 402 440 if (unlikely(r)) { 403 441 if (r != -ERESTARTSYS && domain != lobj->alt_domain) { 404 442 domain = lobj->alt_domain; ··· 658 564 } 659 565 660 566 void radeon_bo_move_notify(struct ttm_buffer_object *bo, 661 - struct ttm_mem_reg *mem) 567 + struct ttm_mem_reg *new_mem) 662 568 { 663 569 struct radeon_bo *rbo; 570 + 664 571 if (!radeon_ttm_bo_is_radeon_bo(bo)) 665 572 return; 573 + 666 574 rbo = container_of(bo, struct radeon_bo, tbo); 667 575 radeon_bo_check_tiling(rbo, 0, 1); 668 576 radeon_vm_bo_invalidate(rbo->rdev, rbo); 577 + 578 + /* update statistics */ 579 + if (!new_mem) 580 + return; 581 + 582 + radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); 583 + radeon_update_memory_usage(rbo, new_mem->mem_type, 1); 669 584 } 670 585 671 586 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+4 -5
drivers/gpu/drm/radeon/radeon_object.h
··· 138 138 extern void radeon_bo_force_delete(struct radeon_device *rdev); 139 139 extern int radeon_bo_init(struct radeon_device *rdev); 140 140 extern void radeon_bo_fini(struct radeon_device *rdev); 141 - extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, 142 - struct list_head *head); 143 - extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, 141 + extern int radeon_bo_list_validate(struct radeon_device *rdev, 142 + struct ww_acquire_ctx *ticket, 144 143 struct list_head *head, int ring); 145 144 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, 146 145 struct vm_area_struct *vma); ··· 150 151 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, 151 152 bool force_drop); 152 153 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, 153 - struct ttm_mem_reg *mem); 154 + struct ttm_mem_reg *new_mem); 154 155 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); 155 156 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); 156 157 ··· 180 181 extern int radeon_sa_bo_new(struct radeon_device *rdev, 181 182 struct radeon_sa_manager *sa_manager, 182 183 struct radeon_sa_bo **sa_bo, 183 - unsigned size, unsigned align, bool block); 184 + unsigned size, unsigned align); 184 185 extern void radeon_sa_bo_free(struct radeon_device *rdev, 185 186 struct radeon_sa_bo **sa_bo, 186 187 struct radeon_fence *fence);
+2 -5
drivers/gpu/drm/radeon/radeon_pm.c
··· 260 260 if (!ring->ready) { 261 261 continue; 262 262 } 263 - r = radeon_fence_wait_empty_locked(rdev, i); 263 + r = radeon_fence_wait_empty(rdev, i); 264 264 if (r) { 265 265 /* needs a GPU reset dont reset here */ 266 266 mutex_unlock(&rdev->ring_lock); ··· 896 896 for (i = 0; i < RADEON_NUM_RINGS; i++) { 897 897 struct radeon_ring *ring = &rdev->ring[i]; 898 898 if (ring->ready) 899 - radeon_fence_wait_empty_locked(rdev, i); 899 + radeon_fence_wait_empty(rdev, i); 900 900 } 901 901 902 902 /* program the new power state */ ··· 943 943 if (enable) { 944 944 mutex_lock(&rdev->pm.mutex); 945 945 rdev->pm.dpm.uvd_active = true; 946 - /* disable this for now */ 947 - #if 0 948 946 if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) 949 947 dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; 950 948 else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) ··· 952 954 else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) 953 955 dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; 954 956 else 955 - #endif 956 957 dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; 957 958 rdev->pm.dpm.state = dpm_state; 958 959 mutex_unlock(&rdev->pm.mutex);
+26 -24
drivers/gpu/drm/radeon/radeon_ring.c
··· 63 63 { 64 64 int r; 65 65 66 - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); 66 + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256); 67 67 if (r) { 68 68 dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); 69 69 return r; ··· 145 145 return r; 146 146 } 147 147 148 + /* grab a vm id if necessary */ 149 + if (ib->vm) { 150 + struct radeon_fence *vm_id_fence; 151 + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); 152 + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); 153 + } 154 + 148 155 /* sync with other rings */ 149 156 r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); 150 157 if (r) { ··· 160 153 return r; 161 154 } 162 155 163 - /* if we can't remember our last VM flush then flush now! */ 164 - /* XXX figure out why we have to flush for every IB */ 165 - if (ib->vm /*&& !ib->vm->last_flush*/) { 166 - radeon_ring_vm_flush(rdev, ib->ring, ib->vm); 167 - } 156 + if (ib->vm) 157 + radeon_vm_flush(rdev, ib->vm, ib->ring); 158 + 168 159 if (const_ib) { 169 160 radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); 170 161 radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); ··· 177 172 if (const_ib) { 178 173 const_ib->fence = radeon_fence_ref(ib->fence); 179 174 } 180 - /* we just flushed the VM, remember that */ 181 - if (ib->vm && !ib->vm->last_flush) { 182 - ib->vm->last_flush = radeon_fence_ref(ib->fence); 183 - } 175 + 176 + if (ib->vm) 177 + radeon_vm_fence(rdev, ib->vm, ib->fence); 178 + 184 179 radeon_ring_unlock_commit(rdev, ring); 185 180 return 0; 186 181 } ··· 387 382 if (ndw < ring->ring_free_dw) { 388 383 break; 389 384 } 390 - r = radeon_fence_wait_next_locked(rdev, ring->idx); 385 + r = radeon_fence_wait_next(rdev, ring->idx); 391 386 if (r) 392 387 return r; 393 388 } ··· 490 485 void radeon_ring_lockup_update(struct radeon_device *rdev, 491 486 struct radeon_ring *ring) 492 487 { 493 - ring->last_rptr = radeon_ring_get_rptr(rdev, ring); 494 - ring->last_activity = jiffies; 488 + atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring)); 489 + atomic64_set(&ring->last_activity, jiffies_64); 495 490 } 496 491 497 492 /** ··· 503 498 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 504 499 { 505 500 uint32_t rptr = radeon_ring_get_rptr(rdev, ring); 506 - unsigned long cjiffies, elapsed; 501 + uint64_t last = atomic64_read(&ring->last_activity); 502 + uint64_t elapsed; 507 503 508 - cjiffies = jiffies; 509 - if (!time_after(cjiffies, ring->last_activity)) { 510 - /* likely a wrap around */ 504 + if (rptr != atomic_read(&ring->last_rptr)) { 505 + /* ring is still working, no lockup */ 511 506 radeon_ring_lockup_update(rdev, ring); 512 507 return false; 513 508 } 514 - if (rptr != ring->last_rptr) { 515 - /* CP is still working no lockup */ 516 - radeon_ring_lockup_update(rdev, ring); 517 - return false; 518 - } 519 - elapsed = jiffies_to_msecs(cjiffies - ring->last_activity); 509 + 510 + elapsed = jiffies_to_msecs(jiffies_64 - last); 520 511 if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { 521 - dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed); 512 + dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n", 513 + ring->idx, elapsed); 522 514 return true; 523 515 } 524 516 /* give a chance to the GPU ... */
+2 -5
drivers/gpu/drm/radeon/radeon_sa.c
··· 312 312 int radeon_sa_bo_new(struct radeon_device *rdev, 313 313 struct radeon_sa_manager *sa_manager, 314 314 struct radeon_sa_bo **sa_bo, 315 - unsigned size, unsigned align, bool block) 315 + unsigned size, unsigned align) 316 316 { 317 317 struct radeon_fence *fences[RADEON_NUM_RINGS]; 318 318 unsigned tries[RADEON_NUM_RINGS]; ··· 353 353 r = radeon_fence_wait_any(rdev, fences, false); 354 354 spin_lock(&sa_manager->wq.lock); 355 355 /* if we have nothing to wait for block */ 356 - if (r == -ENOENT && block) { 356 + if (r == -ENOENT) { 357 357 r = wait_event_interruptible_locked( 358 358 sa_manager->wq, 359 359 radeon_sa_event(sa_manager, size, align) 360 360 ); 361 - 362 - } else if (r == -ENOENT) { 363 - r = -ENOMEM; 364 361 } 365 362 366 363 } while (!r);
+10 -4
drivers/gpu/drm/radeon/radeon_semaphore.c
··· 42 42 return -ENOMEM; 43 43 } 44 44 r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, 45 - 8 * RADEON_NUM_SYNCS, 8, true); 45 + 8 * RADEON_NUM_SYNCS, 8); 46 46 if (r) { 47 47 kfree(*semaphore); 48 48 *semaphore = NULL; ··· 147 147 148 148 if (++count > RADEON_NUM_SYNCS) { 149 149 /* not enough room, wait manually */ 150 - radeon_fence_wait_locked(fence); 150 + r = radeon_fence_wait(fence, false); 151 + if (r) 152 + return r; 151 153 continue; 152 154 } 153 155 ··· 163 161 if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { 164 162 /* signaling wasn't successful wait manually */ 165 163 radeon_ring_undo(&rdev->ring[i]); 166 - radeon_fence_wait_locked(fence); 164 + r = radeon_fence_wait(fence, false); 165 + if (r) 166 + return r; 167 167 continue; 168 168 } 169 169 ··· 173 169 if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { 174 170 /* waiting wasn't successful wait manually */ 175 171 radeon_ring_undo(&rdev->ring[i]); 176 - radeon_fence_wait_locked(fence); 172 + r = radeon_fence_wait(fence, false); 173 + if (r) 174 + return r; 177 175 continue; 178 176 } 179 177
+7 -1
drivers/gpu/drm/radeon/radeon_ttm.c
··· 406 406 if (r) { 407 407 memcpy: 408 408 r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); 409 + if (r) { 410 + return r; 411 + } 409 412 } 410 - return r; 413 + 414 + /* update statistics */ 415 + atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved); 416 + return 0; 411 417 } 412 418 413 419 static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+2 -3
drivers/gpu/drm/radeon/radeon_uvd.c
··· 453 453 } 454 454 455 455 reloc = p->relocs_ptr[(idx / 4)]; 456 - start = reloc->lobj.gpu_offset; 456 + start = reloc->gpu_offset; 457 457 end = start + radeon_bo_size(reloc->robj); 458 458 start += offset; 459 459 ··· 805 805 (rdev->pm.dpm.hd != hd)) { 806 806 rdev->pm.dpm.sd = sd; 807 807 rdev->pm.dpm.hd = hd; 808 - /* disable this for now */ 809 - /*streams_changed = true;*/ 808 + streams_changed = true; 810 809 } 811 810 } 812 811
+35 -30
drivers/gpu/drm/radeon/radeon_vce.c
··· 119 119 if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) 120 120 return -EINVAL; 121 121 122 - /* load firmware into VRAM */ 122 + /* allocate firmware, stack and heap BO */ 123 123 124 124 size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + 125 125 RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; ··· 130 130 return r; 131 131 } 132 132 133 - r = radeon_vce_resume(rdev); 134 - if (r) 133 + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); 134 + if (r) { 135 + radeon_bo_unref(&rdev->vce.vcpu_bo); 136 + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); 135 137 return r; 138 + } 136 139 137 - memset(rdev->vce.cpu_addr, 0, size); 138 - memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); 139 - 140 - r = radeon_vce_suspend(rdev); 141 - if (r) 140 + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 141 + &rdev->vce.gpu_addr); 142 + radeon_bo_unreserve(rdev->vce.vcpu_bo); 143 + if (r) { 144 + radeon_bo_unref(&rdev->vce.vcpu_bo); 145 + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); 142 146 return r; 147 + } 143 148 144 149 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { 145 150 atomic_set(&rdev->vce.handles[i], 0); ··· 163 158 */ 164 159 void radeon_vce_fini(struct radeon_device *rdev) 165 160 { 166 - radeon_vce_suspend(rdev); 161 + if (rdev->vce.vcpu_bo == NULL) 162 + return; 163 + 167 164 radeon_bo_unref(&rdev->vce.vcpu_bo); 165 + 166 + release_firmware(rdev->vce_fw); 168 167 } 169 168 170 169 /** ··· 176 167 * 177 168 * @rdev: radeon_device pointer 178 169 * 179 - * TODO: Test VCE suspend/resume 180 170 */ 181 171 int radeon_vce_suspend(struct radeon_device *rdev) 182 172 { 183 - int r; 173 + int i; 184 174 185 175 if (rdev->vce.vcpu_bo == NULL) 186 176 return 0; 187 177 188 - r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); 189 - if (!r) { 190 - radeon_bo_kunmap(rdev->vce.vcpu_bo); 191 - radeon_bo_unpin(rdev->vce.vcpu_bo); 192 - radeon_bo_unreserve(rdev->vce.vcpu_bo); 193 - } 194 - return r; 178 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) 179 + if (atomic_read(&rdev->vce.handles[i])) 180 + break; 181 + 182 + if (i == RADEON_MAX_VCE_HANDLES) 183 + return 0; 184 + 185 + /* TODO: suspending running encoding sessions isn't supported */ 186 + return -EINVAL; 195 187 } 196 188 197 189 /** ··· 200 190 * 201 191 * @rdev: radeon_device pointer 202 192 * 203 - * TODO: Test VCE suspend/resume 204 193 */ 205 194 int radeon_vce_resume(struct radeon_device *rdev) 206 195 { 196 + void *cpu_addr; 207 197 int r; 208 198 209 199 if (rdev->vce.vcpu_bo == NULL) ··· 211 201 212 202 r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); 213 203 if (r) { 214 - radeon_bo_unref(&rdev->vce.vcpu_bo); 215 204 dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); 216 205 return r; 217 206 } 218 207 219 - r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 220 - &rdev->vce.gpu_addr); 208 + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr); 221 209 if (r) { 222 210 radeon_bo_unreserve(rdev->vce.vcpu_bo); 223 - radeon_bo_unref(&rdev->vce.vcpu_bo); 224 - dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); 225 - return r; 226 - } 227 - 228 - r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); 229 - if (r) { 230 211 dev_err(rdev->dev, "(%d) VCE map failed\n", r); 231 212 return r; 232 213 } 214 + 215 + memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); 216 + 217 + radeon_bo_kunmap(rdev->vce.vcpu_bo); 233 218 234 219 radeon_bo_unreserve(rdev->vce.vcpu_bo); 235 220 ··· 461 456 return -EINVAL; 462 457 } 463 458 464 - offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; 459 + offset += p->relocs_ptr[(idx / 4)]->gpu_offset; 465 460 466 461 p->ib.ptr[lo] = offset & 0xFFFFFFFF; 467 462 p->ib.ptr[hi] = offset >> 32;
+966
drivers/gpu/drm/radeon/radeon_vm.c
··· 1 + /* 2 + * Copyright 2008 Advanced Micro Devices, Inc. 3 + * Copyright 2008 Red Hat Inc. 4 + * Copyright 2009 Jerome Glisse. 5 + * 6 + * Permission is hereby granted, free of charge, to any person obtaining a 7 + * copy of this software and associated documentation files (the "Software"), 8 + * to deal in the Software without restriction, including without limitation 9 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 + * and/or sell copies of the Software, and to permit persons to whom the 11 + * Software is furnished to do so, subject to the following conditions: 12 + * 13 + * The above copyright notice and this permission notice shall be included in 14 + * all copies or substantial portions of the Software. 15 + * 16 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 + * OTHER DEALINGS IN THE SOFTWARE. 23 + * 24 + * Authors: Dave Airlie 25 + * Alex Deucher 26 + * Jerome Glisse 27 + */ 28 + #include <drm/drmP.h> 29 + #include <drm/radeon_drm.h> 30 + #include "radeon.h" 31 + #include "radeon_trace.h" 32 + 33 + /* 34 + * GPUVM 35 + * GPUVM is similar to the legacy gart on older asics, however 36 + * rather than there being a single global gart table 37 + * for the entire GPU, there are multiple VM page tables active 38 + * at any given time. The VM page tables can contain a mix 39 + * vram pages and system memory pages and system memory pages 40 + * can be mapped as snooped (cached system pages) or unsnooped 41 + * (uncached system pages). 42 + * Each VM has an ID associated with it and there is a page table 43 + * associated with each VMID. When execting a command buffer, 44 + * the kernel tells the the ring what VMID to use for that command 45 + * buffer. VMIDs are allocated dynamically as commands are submitted. 46 + * The userspace drivers maintain their own address space and the kernel 47 + * sets up their pages tables accordingly when they submit their 48 + * command buffers and a VMID is assigned. 49 + * Cayman/Trinity support up to 8 active VMs at any given time; 50 + * SI supports 16. 51 + */ 52 + 53 + /** 54 + * radeon_vm_num_pde - return the number of page directory entries 55 + * 56 + * @rdev: radeon_device pointer 57 + * 58 + * Calculate the number of page directory entries (cayman+). 59 + */ 60 + static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 61 + { 62 + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; 63 + } 64 + 65 + /** 66 + * radeon_vm_directory_size - returns the size of the page directory in bytes 67 + * 68 + * @rdev: radeon_device pointer 69 + * 70 + * Calculate the size of the page directory in bytes (cayman+). 71 + */ 72 + static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 73 + { 74 + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 75 + } 76 + 77 + /** 78 + * radeon_vm_manager_init - init the vm manager 79 + * 80 + * @rdev: radeon_device pointer 81 + * 82 + * Init the vm manager (cayman+). 83 + * Returns 0 for success, error for failure. 84 + */ 85 + int radeon_vm_manager_init(struct radeon_device *rdev) 86 + { 87 + int r; 88 + 89 + if (!rdev->vm_manager.enabled) { 90 + r = radeon_asic_vm_init(rdev); 91 + if (r) 92 + return r; 93 + 94 + rdev->vm_manager.enabled = true; 95 + } 96 + return 0; 97 + } 98 + 99 + /** 100 + * radeon_vm_manager_fini - tear down the vm manager 101 + * 102 + * @rdev: radeon_device pointer 103 + * 104 + * Tear down the VM manager (cayman+). 105 + */ 106 + void radeon_vm_manager_fini(struct radeon_device *rdev) 107 + { 108 + int i; 109 + 110 + if (!rdev->vm_manager.enabled) 111 + return; 112 + 113 + for (i = 0; i < RADEON_NUM_VM; ++i) 114 + radeon_fence_unref(&rdev->vm_manager.active[i]); 115 + radeon_asic_vm_fini(rdev); 116 + rdev->vm_manager.enabled = false; 117 + } 118 + 119 + /** 120 + * radeon_vm_get_bos - add the vm BOs to a validation list 121 + * 122 + * @vm: vm providing the BOs 123 + * @head: head of validation list 124 + * 125 + * Add the page directory to the list of BOs to 126 + * validate for command submission (cayman+). 127 + */ 128 + struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, 129 + struct radeon_vm *vm, 130 + struct list_head *head) 131 + { 132 + struct radeon_cs_reloc *list; 133 + unsigned i, idx, size; 134 + 135 + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_cs_reloc); 136 + list = kmalloc(size, GFP_KERNEL); 137 + if (!list) 138 + return NULL; 139 + 140 + /* add the vm page table to the list */ 141 + list[0].gobj = NULL; 142 + list[0].robj = vm->page_directory; 143 + list[0].domain = RADEON_GEM_DOMAIN_VRAM; 144 + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; 145 + list[0].tv.bo = &vm->page_directory->tbo; 146 + list[0].tiling_flags = 0; 147 + list[0].handle = 0; 148 + list_add(&list[0].tv.head, head); 149 + 150 + for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 151 + if (!vm->page_tables[i].bo) 152 + continue; 153 + 154 + list[idx].gobj = NULL; 155 + list[idx].robj = vm->page_tables[i].bo; 156 + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; 157 + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; 158 + list[idx].tv.bo = &list[idx].robj->tbo; 159 + list[idx].tiling_flags = 0; 160 + list[idx].handle = 0; 161 + list_add(&list[idx++].tv.head, head); 162 + } 163 + 164 + return list; 165 + } 166 + 167 + /** 168 + * radeon_vm_grab_id - allocate the next free VMID 169 + * 170 + * @rdev: radeon_device pointer 171 + * @vm: vm to allocate id for 172 + * @ring: ring we want to submit job to 173 + * 174 + * Allocate an id for the vm (cayman+). 175 + * Returns the fence we need to sync to (if any). 176 + * 177 + * Global and local mutex must be locked! 178 + */ 179 + struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 180 + struct radeon_vm *vm, int ring) 181 + { 182 + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 183 + unsigned choices[2] = {}; 184 + unsigned i; 185 + 186 + /* check if the id is still valid */ 187 + if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) 188 + return NULL; 189 + 190 + /* we definately need to flush */ 191 + radeon_fence_unref(&vm->last_flush); 192 + 193 + /* skip over VMID 0, since it is the system VM */ 194 + for (i = 1; i < rdev->vm_manager.nvm; ++i) { 195 + struct radeon_fence *fence = rdev->vm_manager.active[i]; 196 + 197 + if (fence == NULL) { 198 + /* found a free one */ 199 + vm->id = i; 200 + trace_radeon_vm_grab_id(vm->id, ring); 201 + return NULL; 202 + } 203 + 204 + if (radeon_fence_is_earlier(fence, best[fence->ring])) { 205 + best[fence->ring] = fence; 206 + choices[fence->ring == ring ? 0 : 1] = i; 207 + } 208 + } 209 + 210 + for (i = 0; i < 2; ++i) { 211 + if (choices[i]) { 212 + vm->id = choices[i]; 213 + trace_radeon_vm_grab_id(vm->id, ring); 214 + return rdev->vm_manager.active[choices[i]]; 215 + } 216 + } 217 + 218 + /* should never happen */ 219 + BUG(); 220 + return NULL; 221 + } 222 + 223 + /** 224 + * radeon_vm_flush - hardware flush the vm 225 + * 226 + * @rdev: radeon_device pointer 227 + * @vm: vm we want to flush 228 + * @ring: ring to use for flush 229 + * 230 + * Flush the vm (cayman+). 231 + * 232 + * Global and local mutex must be locked! 233 + */ 234 + void radeon_vm_flush(struct radeon_device *rdev, 235 + struct radeon_vm *vm, 236 + int ring) 237 + { 238 + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 239 + 240 + /* if we can't remember our last VM flush then flush now! */ 241 + /* XXX figure out why we have to flush all the time */ 242 + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { 243 + vm->pd_gpu_addr = pd_addr; 244 + radeon_ring_vm_flush(rdev, ring, vm); 245 + } 246 + } 247 + 248 + /** 249 + * radeon_vm_fence - remember fence for vm 250 + * 251 + * @rdev: radeon_device pointer 252 + * @vm: vm we want to fence 253 + * @fence: fence to remember 254 + * 255 + * Fence the vm (cayman+). 256 + * Set the fence used to protect page table and id. 257 + * 258 + * Global and local mutex must be locked! 259 + */ 260 + void radeon_vm_fence(struct radeon_device *rdev, 261 + struct radeon_vm *vm, 262 + struct radeon_fence *fence) 263 + { 264 + radeon_fence_unref(&vm->fence); 265 + vm->fence = radeon_fence_ref(fence); 266 + 267 + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); 268 + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); 269 + 270 + radeon_fence_unref(&vm->last_id_use); 271 + vm->last_id_use = radeon_fence_ref(fence); 272 + 273 + /* we just flushed the VM, remember that */ 274 + if (!vm->last_flush) 275 + vm->last_flush = radeon_fence_ref(fence); 276 + } 277 + 278 + /** 279 + * radeon_vm_bo_find - find the bo_va for a specific vm & bo 280 + * 281 + * @vm: requested vm 282 + * @bo: requested buffer object 283 + * 284 + * Find @bo inside the requested vm (cayman+). 285 + * Search inside the @bos vm list for the requested vm 286 + * Returns the found bo_va or NULL if none is found 287 + * 288 + * Object has to be reserved! 289 + */ 290 + struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 291 + struct radeon_bo *bo) 292 + { 293 + struct radeon_bo_va *bo_va; 294 + 295 + list_for_each_entry(bo_va, &bo->va, bo_list) { 296 + if (bo_va->vm == vm) { 297 + return bo_va; 298 + } 299 + } 300 + return NULL; 301 + } 302 + 303 + /** 304 + * radeon_vm_bo_add - add a bo to a specific vm 305 + * 306 + * @rdev: radeon_device pointer 307 + * @vm: requested vm 308 + * @bo: radeon buffer object 309 + * 310 + * Add @bo into the requested vm (cayman+). 311 + * Add @bo to the list of bos associated with the vm 312 + * Returns newly added bo_va or NULL for failure 313 + * 314 + * Object has to be reserved! 315 + */ 316 + struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 317 + struct radeon_vm *vm, 318 + struct radeon_bo *bo) 319 + { 320 + struct radeon_bo_va *bo_va; 321 + 322 + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 323 + if (bo_va == NULL) { 324 + return NULL; 325 + } 326 + bo_va->vm = vm; 327 + bo_va->bo = bo; 328 + bo_va->soffset = 0; 329 + bo_va->eoffset = 0; 330 + bo_va->flags = 0; 331 + bo_va->valid = false; 332 + bo_va->ref_count = 1; 333 + INIT_LIST_HEAD(&bo_va->bo_list); 334 + INIT_LIST_HEAD(&bo_va->vm_list); 335 + 336 + mutex_lock(&vm->mutex); 337 + list_add(&bo_va->vm_list, &vm->va); 338 + list_add_tail(&bo_va->bo_list, &bo->va); 339 + mutex_unlock(&vm->mutex); 340 + 341 + return bo_va; 342 + } 343 + 344 + /** 345 + * radeon_vm_clear_bo - initially clear the page dir/table 346 + * 347 + * @rdev: radeon_device pointer 348 + * @bo: bo to clear 349 + */ 350 + static int radeon_vm_clear_bo(struct radeon_device *rdev, 351 + struct radeon_bo *bo) 352 + { 353 + struct ttm_validate_buffer tv; 354 + struct ww_acquire_ctx ticket; 355 + struct list_head head; 356 + struct radeon_ib ib; 357 + unsigned entries; 358 + uint64_t addr; 359 + int r; 360 + 361 + memset(&tv, 0, sizeof(tv)); 362 + tv.bo = &bo->tbo; 363 + 364 + INIT_LIST_HEAD(&head); 365 + list_add(&tv.head, &head); 366 + 367 + r = ttm_eu_reserve_buffers(&ticket, &head); 368 + if (r) 369 + return r; 370 + 371 + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 372 + if (r) 373 + goto error; 374 + 375 + addr = radeon_bo_gpu_offset(bo); 376 + entries = radeon_bo_size(bo) / 8; 377 + 378 + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, 379 + NULL, entries * 2 + 64); 380 + if (r) 381 + goto error; 382 + 383 + ib.length_dw = 0; 384 + 385 + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); 386 + 387 + r = radeon_ib_schedule(rdev, &ib, NULL); 388 + if (r) 389 + goto error; 390 + 391 + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); 392 + radeon_ib_free(rdev, &ib); 393 + 394 + return 0; 395 + 396 + error: 397 + ttm_eu_backoff_reservation(&ticket, &head); 398 + return r; 399 + } 400 + 401 + /** 402 + * radeon_vm_bo_set_addr - set bos virtual address inside a vm 403 + * 404 + * @rdev: radeon_device pointer 405 + * @bo_va: bo_va to store the address 406 + * @soffset: requested offset of the buffer in the VM address space 407 + * @flags: attributes of pages (read/write/valid/etc.) 408 + * 409 + * Set offset of @bo_va (cayman+). 410 + * Validate and set the offset requested within the vm address space. 411 + * Returns 0 for success, error for failure. 412 + * 413 + * Object has to be reserved! 414 + */ 415 + int radeon_vm_bo_set_addr(struct radeon_device *rdev, 416 + struct radeon_bo_va *bo_va, 417 + uint64_t soffset, 418 + uint32_t flags) 419 + { 420 + uint64_t size = radeon_bo_size(bo_va->bo); 421 + uint64_t eoffset, last_offset = 0; 422 + struct radeon_vm *vm = bo_va->vm; 423 + struct radeon_bo_va *tmp; 424 + struct list_head *head; 425 + unsigned last_pfn, pt_idx; 426 + int r; 427 + 428 + if (soffset) { 429 + /* make sure object fit at this offset */ 430 + eoffset = soffset + size; 431 + if (soffset >= eoffset) { 432 + return -EINVAL; 433 + } 434 + 435 + last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 436 + if (last_pfn > rdev->vm_manager.max_pfn) { 437 + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 438 + last_pfn, rdev->vm_manager.max_pfn); 439 + return -EINVAL; 440 + } 441 + 442 + } else { 443 + eoffset = last_pfn = 0; 444 + } 445 + 446 + mutex_lock(&vm->mutex); 447 + head = &vm->va; 448 + last_offset = 0; 449 + list_for_each_entry(tmp, &vm->va, vm_list) { 450 + if (bo_va == tmp) { 451 + /* skip over currently modified bo */ 452 + continue; 453 + } 454 + 455 + if (soffset >= last_offset && eoffset <= tmp->soffset) { 456 + /* bo can be added before this one */ 457 + break; 458 + } 459 + if (eoffset > tmp->soffset && soffset < tmp->eoffset) { 460 + /* bo and tmp overlap, invalid offset */ 461 + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", 462 + bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, 463 + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); 464 + mutex_unlock(&vm->mutex); 465 + return -EINVAL; 466 + } 467 + last_offset = tmp->eoffset; 468 + head = &tmp->vm_list; 469 + } 470 + 471 + bo_va->soffset = soffset; 472 + bo_va->eoffset = eoffset; 473 + bo_va->flags = flags; 474 + bo_va->valid = false; 475 + list_move(&bo_va->vm_list, head); 476 + 477 + soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; 478 + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; 479 + 480 + if (eoffset > vm->max_pde_used) 481 + vm->max_pde_used = eoffset; 482 + 483 + radeon_bo_unreserve(bo_va->bo); 484 + 485 + /* walk over the address space and allocate the page tables */ 486 + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 487 + struct radeon_bo *pt; 488 + 489 + if (vm->page_tables[pt_idx].bo) 490 + continue; 491 + 492 + /* drop mutex to allocate and clear page table */ 493 + mutex_unlock(&vm->mutex); 494 + 495 + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 496 + RADEON_GPU_PAGE_SIZE, false, 497 + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); 498 + if (r) 499 + return r; 500 + 501 + r = radeon_vm_clear_bo(rdev, pt); 502 + if (r) { 503 + radeon_bo_unref(&pt); 504 + radeon_bo_reserve(bo_va->bo, false); 505 + return r; 506 + } 507 + 508 + /* aquire mutex again */ 509 + mutex_lock(&vm->mutex); 510 + if (vm->page_tables[pt_idx].bo) { 511 + /* someone else allocated the pt in the meantime */ 512 + mutex_unlock(&vm->mutex); 513 + radeon_bo_unref(&pt); 514 + mutex_lock(&vm->mutex); 515 + continue; 516 + } 517 + 518 + vm->page_tables[pt_idx].addr = 0; 519 + vm->page_tables[pt_idx].bo = pt; 520 + } 521 + 522 + mutex_unlock(&vm->mutex); 523 + return radeon_bo_reserve(bo_va->bo, false); 524 + } 525 + 526 + /** 527 + * radeon_vm_map_gart - get the physical address of a gart page 528 + * 529 + * @rdev: radeon_device pointer 530 + * @addr: the unmapped addr 531 + * 532 + * Look up the physical address of the page that the pte resolves 533 + * to (cayman+). 534 + * Returns the physical address of the page. 535 + */ 536 + uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 537 + { 538 + uint64_t result; 539 + 540 + /* page table offset */ 541 + result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; 542 + 543 + /* in case cpu page size != gpu page size*/ 544 + result |= addr & (~PAGE_MASK); 545 + 546 + return result; 547 + } 548 + 549 + /** 550 + * radeon_vm_page_flags - translate page flags to what the hw uses 551 + * 552 + * @flags: flags comming from userspace 553 + * 554 + * Translate the flags the userspace ABI uses to hw flags. 555 + */ 556 + static uint32_t radeon_vm_page_flags(uint32_t flags) 557 + { 558 + uint32_t hw_flags = 0; 559 + hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 560 + hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 561 + hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 562 + if (flags & RADEON_VM_PAGE_SYSTEM) { 563 + hw_flags |= R600_PTE_SYSTEM; 564 + hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 565 + } 566 + return hw_flags; 567 + } 568 + 569 + /** 570 + * radeon_vm_update_pdes - make sure that page directory is valid 571 + * 572 + * @rdev: radeon_device pointer 573 + * @vm: requested vm 574 + * @start: start of GPU address range 575 + * @end: end of GPU address range 576 + * 577 + * Allocates new page tables if necessary 578 + * and updates the page directory (cayman+). 579 + * Returns 0 for success, error for failure. 580 + * 581 + * Global and local mutex must be locked! 582 + */ 583 + int radeon_vm_update_page_directory(struct radeon_device *rdev, 584 + struct radeon_vm *vm) 585 + { 586 + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; 587 + 588 + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 589 + uint64_t last_pde = ~0, last_pt = ~0; 590 + unsigned count = 0, pt_idx, ndw; 591 + struct radeon_ib ib; 592 + int r; 593 + 594 + /* padding, etc. */ 595 + ndw = 64; 596 + 597 + /* assume the worst case */ 598 + ndw += vm->max_pde_used * 12; 599 + 600 + /* update too big for an IB */ 601 + if (ndw > 0xfffff) 602 + return -ENOMEM; 603 + 604 + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 605 + if (r) 606 + return r; 607 + ib.length_dw = 0; 608 + 609 + /* walk over the address space and update the page directory */ 610 + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 611 + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 612 + uint64_t pde, pt; 613 + 614 + if (bo == NULL) 615 + continue; 616 + 617 + pt = radeon_bo_gpu_offset(bo); 618 + if (vm->page_tables[pt_idx].addr == pt) 619 + continue; 620 + vm->page_tables[pt_idx].addr = pt; 621 + 622 + pde = pd_addr + pt_idx * 8; 623 + if (((last_pde + 8 * count) != pde) || 624 + ((last_pt + incr * count) != pt)) { 625 + 626 + if (count) { 627 + radeon_asic_vm_set_page(rdev, &ib, last_pde, 628 + last_pt, count, incr, 629 + R600_PTE_VALID); 630 + } 631 + 632 + count = 1; 633 + last_pde = pde; 634 + last_pt = pt; 635 + } else { 636 + ++count; 637 + } 638 + } 639 + 640 + if (count) 641 + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, 642 + incr, R600_PTE_VALID); 643 + 644 + if (ib.length_dw != 0) { 645 + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); 646 + r = radeon_ib_schedule(rdev, &ib, NULL); 647 + if (r) { 648 + radeon_ib_free(rdev, &ib); 649 + return r; 650 + } 651 + radeon_fence_unref(&vm->fence); 652 + vm->fence = radeon_fence_ref(ib.fence); 653 + radeon_fence_unref(&vm->last_flush); 654 + } 655 + radeon_ib_free(rdev, &ib); 656 + 657 + return 0; 658 + } 659 + 660 + /** 661 + * radeon_vm_update_ptes - make sure that page tables are valid 662 + * 663 + * @rdev: radeon_device pointer 664 + * @vm: requested vm 665 + * @start: start of GPU address range 666 + * @end: end of GPU address range 667 + * @dst: destination address to map to 668 + * @flags: mapping flags 669 + * 670 + * Update the page tables in the range @start - @end (cayman+). 671 + * 672 + * Global and local mutex must be locked! 673 + */ 674 + static void radeon_vm_update_ptes(struct radeon_device *rdev, 675 + struct radeon_vm *vm, 676 + struct radeon_ib *ib, 677 + uint64_t start, uint64_t end, 678 + uint64_t dst, uint32_t flags) 679 + { 680 + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; 681 + 682 + uint64_t last_pte = ~0, last_dst = ~0; 683 + unsigned count = 0; 684 + uint64_t addr; 685 + 686 + start = start / RADEON_GPU_PAGE_SIZE; 687 + end = end / RADEON_GPU_PAGE_SIZE; 688 + 689 + /* walk over the address space and update the page tables */ 690 + for (addr = start; addr < end; ) { 691 + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; 692 + unsigned nptes; 693 + uint64_t pte; 694 + 695 + if ((addr & ~mask) == (end & ~mask)) 696 + nptes = end - addr; 697 + else 698 + nptes = RADEON_VM_PTE_COUNT - (addr & mask); 699 + 700 + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); 701 + pte += (addr & mask) * 8; 702 + 703 + if ((last_pte + 8 * count) != pte) { 704 + 705 + if (count) { 706 + radeon_asic_vm_set_page(rdev, ib, last_pte, 707 + last_dst, count, 708 + RADEON_GPU_PAGE_SIZE, 709 + flags); 710 + } 711 + 712 + count = nptes; 713 + last_pte = pte; 714 + last_dst = dst; 715 + } else { 716 + count += nptes; 717 + } 718 + 719 + addr += nptes; 720 + dst += nptes * RADEON_GPU_PAGE_SIZE; 721 + } 722 + 723 + if (count) { 724 + radeon_asic_vm_set_page(rdev, ib, last_pte, 725 + last_dst, count, 726 + RADEON_GPU_PAGE_SIZE, flags); 727 + } 728 + } 729 + 730 + /** 731 + * radeon_vm_bo_update - map a bo into the vm page table 732 + * 733 + * @rdev: radeon_device pointer 734 + * @vm: requested vm 735 + * @bo: radeon buffer object 736 + * @mem: ttm mem 737 + * 738 + * Fill in the page table entries for @bo (cayman+). 739 + * Returns 0 for success, -EINVAL for failure. 740 + * 741 + * Object have to be reserved and mutex must be locked! 742 + */ 743 + int radeon_vm_bo_update(struct radeon_device *rdev, 744 + struct radeon_vm *vm, 745 + struct radeon_bo *bo, 746 + struct ttm_mem_reg *mem) 747 + { 748 + struct radeon_ib ib; 749 + struct radeon_bo_va *bo_va; 750 + unsigned nptes, ndw; 751 + uint64_t addr; 752 + int r; 753 + 754 + bo_va = radeon_vm_bo_find(vm, bo); 755 + if (bo_va == NULL) { 756 + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 757 + return -EINVAL; 758 + } 759 + 760 + if (!bo_va->soffset) { 761 + dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 762 + bo, vm); 763 + return -EINVAL; 764 + } 765 + 766 + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) 767 + return 0; 768 + 769 + bo_va->flags &= ~RADEON_VM_PAGE_VALID; 770 + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 771 + if (mem) { 772 + addr = mem->start << PAGE_SHIFT; 773 + if (mem->mem_type != TTM_PL_SYSTEM) { 774 + bo_va->flags |= RADEON_VM_PAGE_VALID; 775 + bo_va->valid = true; 776 + } 777 + if (mem->mem_type == TTM_PL_TT) { 778 + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 779 + } else { 780 + addr += rdev->vm_manager.vram_base_offset; 781 + } 782 + } else { 783 + addr = 0; 784 + bo_va->valid = false; 785 + } 786 + 787 + trace_radeon_vm_bo_update(bo_va); 788 + 789 + nptes = radeon_bo_ngpu_pages(bo); 790 + 791 + /* padding, etc. */ 792 + ndw = 64; 793 + 794 + if (RADEON_VM_BLOCK_SIZE > 11) 795 + /* reserve space for one header for every 2k dwords */ 796 + ndw += (nptes >> 11) * 4; 797 + else 798 + /* reserve space for one header for 799 + every (1 << BLOCK_SIZE) entries */ 800 + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; 801 + 802 + /* reserve space for pte addresses */ 803 + ndw += nptes * 2; 804 + 805 + /* update too big for an IB */ 806 + if (ndw > 0xfffff) 807 + return -ENOMEM; 808 + 809 + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 810 + if (r) 811 + return r; 812 + ib.length_dw = 0; 813 + 814 + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 815 + addr, radeon_vm_page_flags(bo_va->flags)); 816 + 817 + radeon_semaphore_sync_to(ib.semaphore, vm->fence); 818 + r = radeon_ib_schedule(rdev, &ib, NULL); 819 + if (r) { 820 + radeon_ib_free(rdev, &ib); 821 + return r; 822 + } 823 + radeon_fence_unref(&vm->fence); 824 + vm->fence = radeon_fence_ref(ib.fence); 825 + radeon_ib_free(rdev, &ib); 826 + radeon_fence_unref(&vm->last_flush); 827 + 828 + return 0; 829 + } 830 + 831 + /** 832 + * radeon_vm_bo_rmv - remove a bo to a specific vm 833 + * 834 + * @rdev: radeon_device pointer 835 + * @bo_va: requested bo_va 836 + * 837 + * Remove @bo_va->bo from the requested vm (cayman+). 838 + * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and 839 + * remove the ptes for @bo_va in the page table. 840 + * Returns 0 for success. 841 + * 842 + * Object have to be reserved! 843 + */ 844 + int radeon_vm_bo_rmv(struct radeon_device *rdev, 845 + struct radeon_bo_va *bo_va) 846 + { 847 + int r = 0; 848 + 849 + mutex_lock(&bo_va->vm->mutex); 850 + if (bo_va->soffset) 851 + r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); 852 + 853 + list_del(&bo_va->vm_list); 854 + mutex_unlock(&bo_va->vm->mutex); 855 + list_del(&bo_va->bo_list); 856 + 857 + kfree(bo_va); 858 + return r; 859 + } 860 + 861 + /** 862 + * radeon_vm_bo_invalidate - mark the bo as invalid 863 + * 864 + * @rdev: radeon_device pointer 865 + * @vm: requested vm 866 + * @bo: radeon buffer object 867 + * 868 + * Mark @bo as invalid (cayman+). 869 + */ 870 + void radeon_vm_bo_invalidate(struct radeon_device *rdev, 871 + struct radeon_bo *bo) 872 + { 873 + struct radeon_bo_va *bo_va; 874 + 875 + list_for_each_entry(bo_va, &bo->va, bo_list) { 876 + bo_va->valid = false; 877 + } 878 + } 879 + 880 + /** 881 + * radeon_vm_init - initialize a vm instance 882 + * 883 + * @rdev: radeon_device pointer 884 + * @vm: requested vm 885 + * 886 + * Init @vm fields (cayman+). 887 + */ 888 + int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 889 + { 890 + unsigned pd_size, pd_entries, pts_size; 891 + int r; 892 + 893 + vm->id = 0; 894 + vm->fence = NULL; 895 + vm->last_flush = NULL; 896 + vm->last_id_use = NULL; 897 + mutex_init(&vm->mutex); 898 + INIT_LIST_HEAD(&vm->va); 899 + 900 + pd_size = radeon_vm_directory_size(rdev); 901 + pd_entries = radeon_vm_num_pdes(rdev); 902 + 903 + /* allocate page table array */ 904 + pts_size = pd_entries * sizeof(struct radeon_vm_pt); 905 + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 906 + if (vm->page_tables == NULL) { 907 + DRM_ERROR("Cannot allocate memory for page table array\n"); 908 + return -ENOMEM; 909 + } 910 + 911 + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, 912 + RADEON_GEM_DOMAIN_VRAM, NULL, 913 + &vm->page_directory); 914 + if (r) 915 + return r; 916 + 917 + r = radeon_vm_clear_bo(rdev, vm->page_directory); 918 + if (r) { 919 + radeon_bo_unref(&vm->page_directory); 920 + vm->page_directory = NULL; 921 + return r; 922 + } 923 + 924 + return 0; 925 + } 926 + 927 + /** 928 + * radeon_vm_fini - tear down a vm instance 929 + * 930 + * @rdev: radeon_device pointer 931 + * @vm: requested vm 932 + * 933 + * Tear down @vm (cayman+). 934 + * Unbind the VM and remove all bos from the vm bo list 935 + */ 936 + void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 937 + { 938 + struct radeon_bo_va *bo_va, *tmp; 939 + int i, r; 940 + 941 + if (!list_empty(&vm->va)) { 942 + dev_err(rdev->dev, "still active bo inside vm\n"); 943 + } 944 + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { 945 + list_del_init(&bo_va->vm_list); 946 + r = radeon_bo_reserve(bo_va->bo, false); 947 + if (!r) { 948 + list_del_init(&bo_va->bo_list); 949 + radeon_bo_unreserve(bo_va->bo); 950 + kfree(bo_va); 951 + } 952 + } 953 + 954 + 955 + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 956 + radeon_bo_unref(&vm->page_tables[i].bo); 957 + kfree(vm->page_tables); 958 + 959 + radeon_bo_unref(&vm->page_directory); 960 + 961 + radeon_fence_unref(&vm->fence); 962 + radeon_fence_unref(&vm->last_flush); 963 + radeon_fence_unref(&vm->last_id_use); 964 + 965 + mutex_destroy(&vm->mutex); 966 + }
+15
include/uapi/drm/radeon_drm.h
··· 510 510 #define DRM_RADEON_GEM_GET_TILING 0x29 511 511 #define DRM_RADEON_GEM_BUSY 0x2a 512 512 #define DRM_RADEON_GEM_VA 0x2b 513 + #define DRM_RADEON_GEM_OP 0x2c 513 514 514 515 #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) 515 516 #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) ··· 553 552 #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) 554 553 #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) 555 554 #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) 555 + #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) 556 556 557 557 typedef struct drm_radeon_init { 558 558 enum { ··· 886 884 uint64_t data_ptr; 887 885 }; 888 886 887 + /* Sets or returns a value associated with a buffer. */ 888 + struct drm_radeon_gem_op { 889 + uint32_t handle; /* buffer */ 890 + uint32_t op; /* RADEON_GEM_OP_* */ 891 + uint64_t value; /* input or return value */ 892 + }; 893 + 894 + #define RADEON_GEM_OP_GET_INITIAL_DOMAIN 0 895 + #define RADEON_GEM_OP_SET_INITIAL_DOMAIN 1 896 + 889 897 #define RADEON_VA_MAP 1 890 898 #define RADEON_VA_UNMAP 2 891 899 ··· 1004 992 #define RADEON_INFO_VCE_FW_VERSION 0x1b 1005 993 /* version of VCE feedback */ 1006 994 #define RADEON_INFO_VCE_FB_VERSION 0x1c 995 + #define RADEON_INFO_NUM_BYTES_MOVED 0x1d 996 + #define RADEON_INFO_VRAM_USAGE 0x1e 997 + #define RADEON_INFO_GTT_USAGE 0x1f 1007 998 1008 999 1009 1000 struct drm_radeon_info {