drm/i915: Make a single set-to-cpu-domain path and use it wherever needed.

This fixes several domain management bugs, including potential lack of cache
invalidation for pread, potential failure to wait for set_domain(CPU, 0),
and more, along with producing more intelligible code.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>

authored by

Eric Anholt and committed by
Dave Airlie
e47c68e9 2ef7eeaa

+221 -158
+2 -2
drivers/gpu/drm/i915/i915_drv.h
··· 379 379 uint32_t agp_type; 380 380 381 381 /** 382 - * Flagging of which individual pages are valid in GEM_DOMAIN_CPU when 383 - * GEM_DOMAIN_CPU is not in the object's read domain. 382 + * If present, while GEM_DOMAIN_CPU is in the read domain this array 383 + * flags which individual pages are valid. 384 384 */ 385 385 uint8_t *page_cpu_valid; 386 386 };
+219 -156
drivers/gpu/drm/i915/i915_gem.c
··· 37 37 i915_gem_object_set_domain(struct drm_gem_object *obj, 38 38 uint32_t read_domains, 39 39 uint32_t write_domain); 40 - static int 41 - i915_gem_object_set_domain_range(struct drm_gem_object *obj, 42 - uint64_t offset, 43 - uint64_t size, 44 - uint32_t read_domains, 45 - uint32_t write_domain); 46 - static int 47 - i915_gem_set_domain(struct drm_gem_object *obj, 48 - struct drm_file *file_priv, 49 - uint32_t read_domains, 50 - uint32_t write_domain); 40 + static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 41 + static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 42 + static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 51 43 static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, 52 44 int write); 45 + static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 46 + int write); 47 + static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 48 + uint64_t offset, 49 + uint64_t size); 50 + static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 53 51 static int i915_gem_object_get_page_list(struct drm_gem_object *obj); 54 52 static void i915_gem_object_free_page_list(struct drm_gem_object *obj); 55 53 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); ··· 162 164 163 165 mutex_lock(&dev->struct_mutex); 164 166 165 - ret = i915_gem_object_set_domain_range(obj, args->offset, args->size, 166 - I915_GEM_DOMAIN_CPU, 0); 167 + ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 168 + args->size); 167 169 if (ret != 0) { 168 170 drm_gem_object_unreference(obj); 169 171 mutex_unlock(&dev->struct_mutex); ··· 319 321 320 322 mutex_lock(&dev->struct_mutex); 321 323 322 - ret = i915_gem_set_domain(obj, file_priv, 323 - I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); 324 + ret = i915_gem_object_set_to_cpu_domain(obj, 1); 324 325 if (ret) { 325 326 mutex_unlock(&dev->struct_mutex); 326 327 return ret; ··· 436 439 if (read_domains & I915_GEM_DOMAIN_GTT) { 437 440 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 438 441 } else { 439 - ret = i915_gem_set_domain(obj, file_priv, 440 - read_domains, write_domain); 442 + ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 441 443 } 442 444 443 445 drm_gem_object_unreference(obj); ··· 473 477 obj_priv = obj->driver_private; 474 478 475 479 /* Pinned buffers may be scanout, so flush the cache */ 476 - if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) { 477 - i915_gem_clflush_object(obj); 478 - drm_agp_chipset_flush(dev); 479 - } 480 + if (obj_priv->pin_count) 481 + i915_gem_object_flush_cpu_write_domain(obj); 482 + 480 483 drm_gem_object_unreference(obj); 481 484 mutex_unlock(&dev->struct_mutex); 482 485 return ret; ··· 920 925 struct drm_i915_gem_object *obj_priv = obj->driver_private; 921 926 int ret; 922 927 923 - /* If there are writes queued to the buffer, flush and 924 - * create a new seqno to wait for. 928 + /* This function only exists to support waiting for existing rendering, 929 + * not for emitting required flushes. 925 930 */ 926 - if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { 927 - uint32_t seqno, write_domain = obj->write_domain; 928 - #if WATCH_BUF 929 - DRM_INFO("%s: flushing object %p from write domain %08x\n", 930 - __func__, obj, write_domain); 931 - #endif 932 - i915_gem_flush(dev, 0, write_domain); 933 - 934 - seqno = i915_add_request(dev, write_domain); 935 - i915_gem_object_move_to_active(obj, seqno); 936 - #if WATCH_LRU 937 - DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); 938 - #endif 939 - } 931 + BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 940 932 941 933 /* If there is rendering queued on the buffer being evicted, wait for 942 934 * it. ··· 963 981 return -EINVAL; 964 982 } 965 983 966 - /* Wait for any rendering to complete 967 - */ 968 - ret = i915_gem_object_wait_rendering(obj); 969 - if (ret) { 970 - DRM_ERROR("wait_rendering failed: %d\n", ret); 971 - return ret; 972 - } 973 - 974 984 /* Move the object to the CPU domain to ensure that 975 985 * any possible CPU writes while it's not in the GTT 976 986 * are flushed when we go to remap it. This will 977 987 * also ensure that all pending GPU writes are finished 978 988 * before we unbind. 979 989 */ 980 - ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU, 981 - I915_GEM_DOMAIN_CPU); 990 + ret = i915_gem_object_set_to_cpu_domain(obj, 1); 982 991 if (ret) { 983 - DRM_ERROR("set_domain failed: %d\n", ret); 992 + if (ret != -ERESTARTSYS) 993 + DRM_ERROR("set_domain failed: %d\n", ret); 984 994 return ret; 985 995 } 986 996 ··· 1233 1259 drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); 1234 1260 } 1235 1261 1262 + /** Flushes any GPU write domain for the object if it's dirty. */ 1263 + static void 1264 + i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 1265 + { 1266 + struct drm_device *dev = obj->dev; 1267 + uint32_t seqno; 1268 + 1269 + if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 1270 + return; 1271 + 1272 + /* Queue the GPU write cache flushing we need. */ 1273 + i915_gem_flush(dev, 0, obj->write_domain); 1274 + seqno = i915_add_request(dev, obj->write_domain); 1275 + obj->write_domain = 0; 1276 + i915_gem_object_move_to_active(obj, seqno); 1277 + } 1278 + 1279 + /** Flushes the GTT write domain for the object if it's dirty. */ 1280 + static void 1281 + i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 1282 + { 1283 + if (obj->write_domain != I915_GEM_DOMAIN_GTT) 1284 + return; 1285 + 1286 + /* No actual flushing is required for the GTT write domain. Writes 1287 + * to it immediately go to main memory as far as we know, so there's 1288 + * no chipset flush. It also doesn't land in render cache. 1289 + */ 1290 + obj->write_domain = 0; 1291 + } 1292 + 1293 + /** Flushes the CPU write domain for the object if it's dirty. */ 1294 + static void 1295 + i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 1296 + { 1297 + struct drm_device *dev = obj->dev; 1298 + 1299 + if (obj->write_domain != I915_GEM_DOMAIN_CPU) 1300 + return; 1301 + 1302 + i915_gem_clflush_object(obj); 1303 + drm_agp_chipset_flush(dev); 1304 + obj->write_domain = 0; 1305 + } 1306 + 1236 1307 /** 1237 1308 * Moves a single object to the GTT read, and possibly write domain. 1238 1309 * ··· 1287 1268 static int 1288 1269 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 1289 1270 { 1290 - struct drm_device *dev = obj->dev; 1291 1271 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1292 - uint32_t flush_domains; 1272 + int ret; 1293 1273 1294 - /* Figure out what GPU domains we need to flush or invalidate for 1295 - * moving to GTT. 1296 - */ 1297 - flush_domains = obj->write_domain & I915_GEM_GPU_DOMAINS; 1298 - 1299 - /* Queue the GPU write cache flushing we need. */ 1300 - if (flush_domains != 0) { 1301 - uint32_t seqno; 1302 - 1303 - obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 1304 - i915_gem_flush(dev, 0, flush_domains); 1305 - seqno = i915_add_request(dev, flush_domains); 1306 - i915_gem_object_move_to_active(obj, seqno); 1307 - } 1308 - 1274 + i915_gem_object_flush_gpu_write_domain(obj); 1309 1275 /* Wait on any GPU rendering and flushing to occur. */ 1310 - if (obj_priv->active) { 1311 - int ret; 1312 - 1313 - ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1314 - if (ret != 0) 1315 - return ret; 1316 - } 1276 + ret = i915_gem_object_wait_rendering(obj); 1277 + if (ret != 0) 1278 + return ret; 1317 1279 1318 1280 /* If we're writing through the GTT domain, then CPU and GPU caches 1319 1281 * will need to be invalidated at next use. 1320 1282 */ 1321 1283 if (write) 1322 - obj->read_domains &= ~(I915_GEM_GPU_DOMAINS | 1323 - I915_GEM_DOMAIN_CPU); 1284 + obj->read_domains &= I915_GEM_DOMAIN_GTT; 1324 1285 1325 - /* Flush the CPU domain if it's dirty. */ 1326 - if (obj->write_domain & I915_GEM_DOMAIN_CPU) { 1327 - i915_gem_clflush_object(obj); 1328 - drm_agp_chipset_flush(dev); 1329 - 1330 - obj->write_domain &= ~I915_GEM_DOMAIN_CPU; 1331 - } 1286 + i915_gem_object_flush_cpu_write_domain(obj); 1332 1287 1333 1288 /* It should now be out of any other write domains, and we can update 1334 1289 * the domain values for our changes. 1335 1290 */ 1336 1291 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 1337 1292 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1338 - if (write) 1293 + if (write) { 1339 1294 obj->write_domain = I915_GEM_DOMAIN_GTT; 1295 + obj_priv->dirty = 1; 1296 + } 1297 + 1298 + return 0; 1299 + } 1300 + 1301 + /** 1302 + * Moves a single object to the CPU read, and possibly write domain. 1303 + * 1304 + * This function returns when the move is complete, including waiting on 1305 + * flushes to occur. 1306 + */ 1307 + static int 1308 + i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 1309 + { 1310 + struct drm_device *dev = obj->dev; 1311 + int ret; 1312 + 1313 + i915_gem_object_flush_gpu_write_domain(obj); 1314 + /* Wait on any GPU rendering and flushing to occur. */ 1315 + ret = i915_gem_object_wait_rendering(obj); 1316 + if (ret != 0) 1317 + return ret; 1318 + 1319 + i915_gem_object_flush_gtt_write_domain(obj); 1320 + 1321 + /* If we have a partially-valid cache of the object in the CPU, 1322 + * finish invalidating it and free the per-page flags. 1323 + */ 1324 + i915_gem_object_set_to_full_cpu_read_domain(obj); 1325 + 1326 + /* Flush the CPU cache if it's still invalid. */ 1327 + if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 1328 + i915_gem_clflush_object(obj); 1329 + drm_agp_chipset_flush(dev); 1330 + 1331 + obj->read_domains |= I915_GEM_DOMAIN_CPU; 1332 + } 1333 + 1334 + /* It should now be out of any other write domains, and we can update 1335 + * the domain values for our changes. 1336 + */ 1337 + BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 1338 + 1339 + /* If we're writing through the CPU, then the GPU read domains will 1340 + * need to be invalidated at next use. 1341 + */ 1342 + if (write) { 1343 + obj->read_domains &= I915_GEM_DOMAIN_CPU; 1344 + obj->write_domain = I915_GEM_DOMAIN_CPU; 1345 + } 1340 1346 1341 1347 return 0; 1342 1348 } ··· 1486 1442 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1487 1443 uint32_t invalidate_domains = 0; 1488 1444 uint32_t flush_domains = 0; 1489 - int ret; 1445 + 1446 + BUG_ON(read_domains & I915_GEM_DOMAIN_CPU); 1447 + BUG_ON(write_domain == I915_GEM_DOMAIN_CPU); 1490 1448 1491 1449 #if WATCH_BUF 1492 1450 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", ··· 1525 1479 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 1526 1480 __func__, flush_domains, invalidate_domains); 1527 1481 #endif 1528 - /* 1529 - * If we're invaliding the CPU cache and flushing a GPU cache, 1530 - * then pause for rendering so that the GPU caches will be 1531 - * flushed before the cpu cache is invalidated 1532 - */ 1533 - if ((invalidate_domains & I915_GEM_DOMAIN_CPU) && 1534 - (flush_domains & ~(I915_GEM_DOMAIN_CPU | 1535 - I915_GEM_DOMAIN_GTT))) { 1536 - ret = i915_gem_object_wait_rendering(obj); 1537 - if (ret) 1538 - return ret; 1539 - } 1540 1482 i915_gem_clflush_object(obj); 1541 1483 } 1542 1484 1543 1485 if ((write_domain | flush_domains) != 0) 1544 1486 obj->write_domain = write_domain; 1545 - 1546 - /* If we're invalidating the CPU domain, clear the per-page CPU 1547 - * domain list as well. 1548 - */ 1549 - if (obj_priv->page_cpu_valid != NULL && 1550 - (write_domain != 0 || 1551 - read_domains & I915_GEM_DOMAIN_CPU)) { 1552 - drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1553 - DRM_MEM_DRIVER); 1554 - obj_priv->page_cpu_valid = NULL; 1555 - } 1556 1487 obj->read_domains = read_domains; 1557 1488 1558 1489 dev->invalidate_domains |= invalidate_domains; ··· 1544 1521 } 1545 1522 1546 1523 /** 1547 - * Set the read/write domain on a range of the object. 1524 + * Moves the object from a partially CPU read to a full one. 1548 1525 * 1549 - * Currently only implemented for CPU reads, otherwise drops to normal 1550 - * i915_gem_object_set_domain(). 1526 + * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 1527 + * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 1528 + */ 1529 + static void 1530 + i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 1531 + { 1532 + struct drm_device *dev = obj->dev; 1533 + struct drm_i915_gem_object *obj_priv = obj->driver_private; 1534 + 1535 + if (!obj_priv->page_cpu_valid) 1536 + return; 1537 + 1538 + /* If we're partially in the CPU read domain, finish moving it in. 1539 + */ 1540 + if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 1541 + int i; 1542 + 1543 + for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 1544 + if (obj_priv->page_cpu_valid[i]) 1545 + continue; 1546 + drm_clflush_pages(obj_priv->page_list + i, 1); 1547 + } 1548 + drm_agp_chipset_flush(dev); 1549 + } 1550 + 1551 + /* Free the page_cpu_valid mappings which are now stale, whether 1552 + * or not we've got I915_GEM_DOMAIN_CPU. 1553 + */ 1554 + drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1555 + DRM_MEM_DRIVER); 1556 + obj_priv->page_cpu_valid = NULL; 1557 + } 1558 + 1559 + /** 1560 + * Set the CPU read domain on a range of the object. 1561 + * 1562 + * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 1563 + * not entirely valid. The page_cpu_valid member of the object flags which 1564 + * pages have been flushed, and will be respected by 1565 + * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 1566 + * of the whole object. 1567 + * 1568 + * This function returns when the move is complete, including waiting on 1569 + * flushes to occur. 1551 1570 */ 1552 1571 static int 1553 - i915_gem_object_set_domain_range(struct drm_gem_object *obj, 1554 - uint64_t offset, 1555 - uint64_t size, 1556 - uint32_t read_domains, 1557 - uint32_t write_domain) 1572 + i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 1573 + uint64_t offset, uint64_t size) 1558 1574 { 1559 1575 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1560 - int ret, i; 1576 + int i, ret; 1561 1577 1562 - if (obj->read_domains & I915_GEM_DOMAIN_CPU) 1578 + if (offset == 0 && size == obj->size) 1579 + return i915_gem_object_set_to_cpu_domain(obj, 0); 1580 + 1581 + i915_gem_object_flush_gpu_write_domain(obj); 1582 + /* Wait on any GPU rendering and flushing to occur. */ 1583 + ret = i915_gem_object_wait_rendering(obj); 1584 + if (ret != 0) 1585 + return ret; 1586 + i915_gem_object_flush_gtt_write_domain(obj); 1587 + 1588 + /* If we're already fully in the CPU read domain, we're done. */ 1589 + if (obj_priv->page_cpu_valid == NULL && 1590 + (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 1563 1591 return 0; 1564 1592 1565 - if (read_domains != I915_GEM_DOMAIN_CPU || 1566 - write_domain != 0) 1567 - return i915_gem_object_set_domain(obj, 1568 - read_domains, write_domain); 1569 - 1570 - /* Wait on any GPU rendering to the object to be flushed. */ 1571 - ret = i915_gem_object_wait_rendering(obj); 1572 - if (ret) 1573 - return ret; 1574 - 1593 + /* Otherwise, create/clear the per-page CPU read domain flag if we're 1594 + * newly adding I915_GEM_DOMAIN_CPU 1595 + */ 1575 1596 if (obj_priv->page_cpu_valid == NULL) { 1576 1597 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 1577 1598 DRM_MEM_DRIVER); 1578 - } 1599 + if (obj_priv->page_cpu_valid == NULL) 1600 + return -ENOMEM; 1601 + } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 1602 + memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 1579 1603 1580 1604 /* Flush the cache on any pages that are still invalid from the CPU's 1581 1605 * perspective. 1582 1606 */ 1583 - for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) { 1607 + for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 1608 + i++) { 1584 1609 if (obj_priv->page_cpu_valid[i]) 1585 1610 continue; 1586 1611 ··· 1636 1565 1637 1566 obj_priv->page_cpu_valid[i] = 1; 1638 1567 } 1568 + 1569 + /* It should now be out of any other write domains, and we can update 1570 + * the domain values for our changes. 1571 + */ 1572 + BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 1573 + 1574 + obj->read_domains |= I915_GEM_DOMAIN_CPU; 1639 1575 1640 1576 return 0; 1641 1577 } ··· 1754 1676 (int) reloc.offset); 1755 1677 drm_gem_object_unreference(target_obj); 1756 1678 i915_gem_object_unpin(obj); 1679 + return -EINVAL; 1680 + } 1681 + 1682 + if (reloc.write_domain & I915_GEM_DOMAIN_CPU || 1683 + reloc.read_domains & I915_GEM_DOMAIN_CPU) { 1684 + DRM_ERROR("reloc with read/write CPU domains: " 1685 + "obj %p target %d offset %d " 1686 + "read %08x write %08x", 1687 + obj, reloc.target_handle, 1688 + (int) reloc.offset, 1689 + reloc.read_domains, 1690 + reloc.write_domain); 1757 1691 return -EINVAL; 1758 1692 } 1759 1693 ··· 2247 2157 /* XXX - flush the CPU caches for pinned objects 2248 2158 * as the X server doesn't manage domains yet 2249 2159 */ 2250 - if (obj->write_domain & I915_GEM_DOMAIN_CPU) { 2251 - i915_gem_clflush_object(obj); 2252 - drm_agp_chipset_flush(dev); 2253 - obj->write_domain = 0; 2254 - } 2160 + i915_gem_object_flush_cpu_write_domain(obj); 2255 2161 args->offset = obj_priv->gtt_offset; 2256 2162 drm_gem_object_unreference(obj); 2257 2163 mutex_unlock(&dev->struct_mutex); ··· 2347 2261 2348 2262 drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); 2349 2263 drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); 2350 - } 2351 - 2352 - static int 2353 - i915_gem_set_domain(struct drm_gem_object *obj, 2354 - struct drm_file *file_priv, 2355 - uint32_t read_domains, 2356 - uint32_t write_domain) 2357 - { 2358 - struct drm_device *dev = obj->dev; 2359 - int ret; 2360 - uint32_t flush_domains; 2361 - 2362 - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 2363 - 2364 - ret = i915_gem_object_set_domain(obj, read_domains, write_domain); 2365 - if (ret) 2366 - return ret; 2367 - flush_domains = i915_gem_dev_set_domain(obj->dev); 2368 - 2369 - if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) 2370 - (void) i915_add_request(dev, flush_domains); 2371 - 2372 - return 0; 2373 2264 } 2374 2265 2375 2266 /** Unbinds all objects that are on the given buffer list. */