drm/i915: Use per object locking in execbuf, v12.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Now that we changed execbuf submission slightly to allow us to do all
pinning in one place, we can now simply add ww versions on top of
struct_mutex. All we have to do is a separate path for -EDEADLK
handling, which needs to unpin all gem bo's before dropping the lock,
then starting over.

This finally allows us to do parallel submission, but because not
all of the pinning code uses the ww ctx yet, we cannot completely
drop struct_mutex yet.

Changes since v1:
- Keep struct_mutex for now. :(
Changes since v2:
- Make sure we always lock the ww context in slowpath.
Changes since v3:
- Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be
done on normal unlock path.
- Unconditionally release vmas and context.
Changes since v4:
- Rebased on top of struct_mutex reduction.
Changes since v5:
- Remove training wheels.
Changes since v6:
- Fix accidentally broken -ENOSPC handling.
Changes since v7:
- Handle gt buffer pool better.
Changes since v8:
- Properly clear variables, to make -EDEADLK handling not BUG.
Change since v9:
- Fix unpinning fence on pnv and below.
Changes since v10:
- Make relocation gpu chaining working again.
Changes since v11:
- Remove relocation chaining, pain to make it work.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200819140904.1708856-9-maarten.lankhorst@linux.intel.com
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

authored by

Maarten Lankhorst and committed by

Joonas Lahtinen 5 years ago c43ce123 8e4ba491

+262 -170

4 changed files

expand all

drivers

gpu

drm

i915

gem

i915_gem_execbuffer.c

selftests

i915_gem_execbuffer.c

i915_gem.c

i915_gem.h

+217 -146

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

··· 257 257 /** list of vma that have execobj.relocation_count */ 258 258 struct list_head relocs; 259 259 260 + struct i915_gem_ww_ctx ww; 261 + 260 262 /** 261 263 * Track the most recently used object for relocations, as we 262 264 * frequently have to perform multiple relocations within the same ··· 277 275 struct i915_request *rq; 278 276 u32 *rq_cmd; 279 277 unsigned int rq_size; 278 + struct intel_gt_buffer_pool_node *pool; 280 279 } reloc_cache; 280 + 281 + struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ 281 282 282 283 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 283 284 u32 context_flags; /** Set of execobj.flags to insert from the ctx */ ··· 288 283 u32 batch_start_offset; /** Location within object of batch */ 289 284 u32 batch_len; /** Length of batch within object */ 290 285 u32 batch_flags; /** Flags composed for emit_bb_start() */ 286 + struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ 291 287 292 288 /** 293 289 * Indicate either the size of the hastable used to resolve ··· 458 452 return !eb_vma_misplaced(entry, vma, ev->flags); 459 453 } 460 454 461 - static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) 462 - { 463 - GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); 464 - 465 - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) 466 - __i915_vma_unpin_fence(vma); 467 - 468 - __i915_vma_unpin(vma); 469 - } 470 - 471 455 static inline void 472 456 eb_unreserve_vma(struct eb_vma *ev) 473 457 { 474 458 if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) 475 459 return; 476 460 477 - __eb_unreserve_vma(ev->vma, ev->flags); 461 + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) 462 + __i915_vma_unpin_fence(ev->vma); 463 + 464 + __i915_vma_unpin(ev->vma); 478 465 ev->flags &= ~__EXEC_OBJECT_RESERVED; 479 466 } 480 467 ··· 562 563 563 564 eb->batch = ev; 564 565 } 565 - 566 - if (eb_pin_vma(eb, entry, ev)) { 567 - if (entry->offset != vma->node.start) { 568 - entry->offset = vma->node.start | UPDATE; 569 - eb->args->flags |= __EXEC_HAS_RELOC; 570 - } 571 - } else { 572 - eb_unreserve_vma(ev); 573 - list_add_tail(&ev->bind_link, &eb->unbound); 574 - } 575 566 } 576 567 577 568 static inline int use_cpu_reloc(const struct reloc_cache *cache, ··· 646 657 * This avoid unnecessary unbinding of later objects in order to make 647 658 * room for the earlier objects *unless* we need to defragment. 648 659 */ 649 - 650 - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) 651 - return -EINTR; 652 - 653 660 pass = 0; 654 661 do { 655 662 list_for_each_entry(ev, &eb->unbound, bind_link) { ··· 654 669 break; 655 670 } 656 671 if (err != -ENOSPC) 657 - break; 672 + return err; 658 673 659 674 /* Resort *all* the objects into priority order */ 660 675 INIT_LIST_HEAD(&eb->unbound); ··· 694 709 err = i915_gem_evict_vm(eb->context->vm); 695 710 mutex_unlock(&eb->context->vm->mutex); 696 711 if (err) 697 - goto unlock; 712 + return err; 698 713 break; 699 714 700 715 default: 701 - err = -ENOSPC; 702 - goto unlock; 716 + return -ENOSPC; 703 717 } 704 718 705 719 pin_flags = PIN_USER; 706 720 } while (1); 707 - 708 - unlock: 709 - mutex_unlock(&eb->i915->drm.struct_mutex); 710 - return err; 711 721 } 712 722 713 723 static unsigned int eb_batch_index(const struct i915_execbuffer *eb) ··· 831 851 int err = 0; 832 852 833 853 INIT_LIST_HEAD(&eb->relocs); 834 - INIT_LIST_HEAD(&eb->unbound); 835 854 836 855 for (i = 0; i < eb->buffer_count; i++) { 837 856 struct i915_vma *vma; ··· 873 894 return err; 874 895 } 875 896 897 + static int eb_validate_vmas(struct i915_execbuffer *eb) 898 + { 899 + unsigned int i; 900 + int err; 901 + 902 + INIT_LIST_HEAD(&eb->unbound); 903 + 904 + for (i = 0; i < eb->buffer_count; i++) { 905 + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 906 + struct eb_vma *ev = &eb->vma[i]; 907 + struct i915_vma *vma = ev->vma; 908 + 909 + err = i915_gem_object_lock(vma->obj, &eb->ww); 910 + if (err) 911 + return err; 912 + 913 + if (eb_pin_vma(eb, entry, ev)) { 914 + if (entry->offset != vma->node.start) { 915 + entry->offset = vma->node.start | UPDATE; 916 + eb->args->flags |= __EXEC_HAS_RELOC; 917 + } 918 + } else { 919 + eb_unreserve_vma(ev); 920 + 921 + list_add_tail(&ev->bind_link, &eb->unbound); 922 + if (drm_mm_node_allocated(&vma->node)) { 923 + err = i915_vma_unbind(vma); 924 + if (err) 925 + return err; 926 + } 927 + } 928 + 929 + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 930 + eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); 931 + } 932 + 933 + if (!list_empty(&eb->unbound)) 934 + return eb_reserve(eb); 935 + 936 + return 0; 937 + } 938 + 876 939 static struct eb_vma * 877 940 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 878 941 { ··· 935 914 } 936 915 } 937 916 938 - static void eb_release_vmas(const struct i915_execbuffer *eb) 917 + static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) 939 918 { 940 919 const unsigned int count = eb->buffer_count; 941 920 unsigned int i; ··· 947 926 if (!vma) 948 927 break; 949 928 950 - eb->vma[i].vma = NULL; 929 + eb_unreserve_vma(ev); 951 930 952 - if (ev->flags & __EXEC_OBJECT_HAS_PIN) 953 - __eb_unreserve_vma(vma, ev->flags); 954 - 955 - i915_vma_put(vma); 931 + if (final) 932 + i915_vma_put(vma); 956 933 } 957 934 } 958 935 ··· 969 950 return gen8_canonical_addr((int)reloc->delta + target->node.start); 970 951 } 971 952 953 + static void reloc_cache_clear(struct reloc_cache *cache) 954 + { 955 + cache->rq = NULL; 956 + cache->rq_cmd = NULL; 957 + cache->pool = NULL; 958 + cache->rq_size = 0; 959 + } 960 + 972 961 static void reloc_cache_init(struct reloc_cache *cache, 973 962 struct drm_i915_private *i915) 974 963 { ··· 989 962 cache->has_fence = cache->gen < 4; 990 963 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 991 964 cache->node.flags = 0; 992 - cache->rq = NULL; 993 - cache->rq_size = 0; 965 + reloc_cache_clear(cache); 994 966 } 995 967 996 968 static inline void *unmask_page(unsigned long p) ··· 1011 985 return &i915->ggtt; 1012 986 } 1013 987 1014 - static void reloc_gpu_flush(struct reloc_cache *cache) 988 + static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) 989 + { 990 + if (!cache->pool) 991 + return; 992 + 993 + /* 994 + * This is a bit nasty, normally we keep objects locked until the end 995 + * of execbuffer, but we already submit this, and have to unlock before 996 + * dropping the reference. Fortunately we can only hold 1 pool node at 997 + * a time, so this should be harmless. 998 + */ 999 + i915_gem_ww_unlock_single(cache->pool->obj); 1000 + intel_gt_buffer_pool_put(cache->pool); 1001 + cache->pool = NULL; 1002 + } 1003 + 1004 + static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) 1015 1005 { 1016 1006 struct drm_i915_gem_object *obj = cache->rq->batch->obj; 1017 1007 ··· 1040 998 intel_gt_chipset_flush(cache->rq->engine->gt); 1041 999 1042 1000 i915_request_add(cache->rq); 1043 - cache->rq = NULL; 1001 + reloc_cache_put_pool(eb, cache); 1002 + reloc_cache_clear(cache); 1003 + 1004 + eb->reloc_pool = NULL; 1044 1005 } 1045 1006 1046 - static void reloc_cache_reset(struct reloc_cache *cache) 1007 + static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) 1047 1008 { 1048 1009 void *vaddr; 1049 1010 1050 1011 if (cache->rq) 1051 - reloc_gpu_flush(cache); 1012 + reloc_gpu_flush(eb, cache); 1052 1013 1053 1014 if (!cache->vaddr) 1054 1015 return; ··· 1065 1020 1066 1021 kunmap_atomic(vaddr); 1067 1022 i915_gem_object_finish_access(obj); 1068 - i915_gem_object_unlock(obj); 1069 1023 } else { 1070 1024 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1071 1025 ··· 1100 1056 unsigned int flushes; 1101 1057 int err; 1102 1058 1103 - err = i915_gem_object_lock_interruptible(obj, NULL); 1059 + err = i915_gem_object_prepare_write(obj, &flushes); 1104 1060 if (err) 1105 1061 return ERR_PTR(err); 1106 - 1107 - err = i915_gem_object_prepare_write(obj, &flushes); 1108 - if (err) { 1109 - i915_gem_object_unlock(obj); 1110 - return ERR_PTR(err); 1111 - } 1112 1062 1113 1063 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1114 1064 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); ··· 1145 1107 if (use_cpu_reloc(cache, obj)) 1146 1108 return NULL; 1147 1109 1148 - i915_gem_object_lock(obj, NULL); 1149 1110 err = i915_gem_object_set_to_gtt_domain(obj, true); 1150 - i915_gem_object_unlock(obj); 1151 1111 if (err) 1152 1112 return ERR_PTR(err); 1153 1113 ··· 1234 1198 struct drm_i915_gem_object *obj = vma->obj; 1235 1199 int err; 1236 1200 1237 - i915_vma_lock(vma); 1201 + assert_vma_held(vma); 1238 1202 1239 1203 if (obj->cache_dirty & ~obj->cache_coherent) 1240 1204 i915_gem_clflush_object(obj, 0); ··· 1243 1207 err = i915_request_await_object(rq, vma->obj, true); 1244 1208 if (err == 0) 1245 1209 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1246 - 1247 - i915_vma_unlock(vma); 1248 1210 1249 1211 return err; 1250 1212 } ··· 1253 1219 unsigned int len) 1254 1220 { 1255 1221 struct reloc_cache *cache = &eb->reloc_cache; 1256 - struct intel_gt_buffer_pool_node *pool; 1222 + struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; 1257 1223 struct i915_request *rq; 1258 1224 struct i915_vma *batch; 1259 1225 u32 *cmd; 1260 1226 int err; 1261 1227 1262 - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); 1263 - if (IS_ERR(pool)) 1264 - return PTR_ERR(pool); 1228 + if (!pool) { 1229 + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); 1230 + if (IS_ERR(pool)) 1231 + return PTR_ERR(pool); 1232 + } 1233 + eb->reloc_pool = NULL; 1234 + 1235 + err = i915_gem_object_lock(pool->obj, &eb->ww); 1236 + if (err) 1237 + goto err_pool; 1265 1238 1266 1239 cmd = i915_gem_object_pin_map(pool->obj, 1267 1240 cache->has_llc ? ··· 1276 1235 I915_MAP_FORCE_WC); 1277 1236 if (IS_ERR(cmd)) { 1278 1237 err = PTR_ERR(cmd); 1279 - goto out_pool; 1238 + goto err_pool; 1280 1239 } 1281 1240 1282 1241 batch = i915_vma_instance(pool->obj, vma->vm, NULL); ··· 1325 1284 if (err) 1326 1285 goto skip_request; 1327 1286 1328 - i915_vma_lock(batch); 1287 + assert_vma_held(batch); 1329 1288 err = i915_request_await_object(rq, batch->obj, false); 1330 1289 if (err == 0) 1331 1290 err = i915_vma_move_to_active(batch, rq, 0); 1332 - i915_vma_unlock(batch); 1333 1291 if (err) 1334 1292 goto skip_request; 1335 1293 ··· 1338 1298 cache->rq = rq; 1339 1299 cache->rq_cmd = cmd; 1340 1300 cache->rq_size = 0; 1301 + cache->pool = pool; 1341 1302 1342 1303 /* Return with batch mapping (cmd) still pinned */ 1343 - goto out_pool; 1304 + return 0; 1344 1305 1345 1306 skip_request: 1346 1307 i915_request_set_error_once(rq, err); ··· 1351 1310 i915_vma_unpin(batch); 1352 1311 err_unmap: 1353 1312 i915_gem_object_unpin_map(pool->obj); 1354 - out_pool: 1355 - intel_gt_buffer_pool_put(pool); 1313 + err_pool: 1314 + eb->reloc_pool = pool; 1356 1315 return err; 1357 1316 } 1358 1317 ··· 1369 1328 u32 *cmd; 1370 1329 1371 1330 if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) 1372 - reloc_gpu_flush(cache); 1331 + reloc_gpu_flush(eb, cache); 1373 1332 1374 1333 if (unlikely(!cache->rq)) { 1375 1334 int err; ··· 1417 1376 return addr + offset_in_page(offset); 1418 1377 } 1419 1378 1420 - static bool __reloc_entry_gpu(struct i915_execbuffer *eb, 1379 + static int __reloc_entry_gpu(struct i915_execbuffer *eb, 1421 1380 struct i915_vma *vma, 1422 1381 u64 offset, 1423 1382 u64 target_addr) ··· 1435 1394 len = 3; 1436 1395 1437 1396 batch = reloc_gpu(eb, vma, len); 1438 - if (IS_ERR(batch)) 1397 + if (batch == ERR_PTR(-EDEADLK)) 1398 + return (s64)-EDEADLK; 1399 + else if (IS_ERR(batch)) 1439 1400 return false; 1440 1401 1441 1402 addr = gen8_canonical_addr(vma->node.start + offset); ··· 1490 1447 return true; 1491 1448 } 1492 1449 1493 - static bool reloc_entry_gpu(struct i915_execbuffer *eb, 1450 + static int reloc_entry_gpu(struct i915_execbuffer *eb, 1494 1451 struct i915_vma *vma, 1495 1452 u64 offset, 1496 1453 u64 target_addr) ··· 1512 1469 { 1513 1470 u64 target_addr = relocation_target(reloc, target); 1514 1471 u64 offset = reloc->offset; 1472 + int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); 1515 1473 1516 - if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { 1474 + if (reloc_gpu < 0) 1475 + return reloc_gpu; 1476 + 1477 + if (!reloc_gpu) { 1517 1478 bool wide = eb->reloc_cache.use_64bit_reloc; 1518 1479 void *vaddr; 1519 1480 ··· 1720 1673 urelocs += ARRAY_SIZE(stack); 1721 1674 } while (remain); 1722 1675 out: 1723 - reloc_cache_reset(&eb->reloc_cache); 1676 + reloc_cache_reset(&eb->reloc_cache, eb); 1724 1677 return remain; 1725 1678 } 1726 1679 ··· 1743 1696 } 1744 1697 err = 0; 1745 1698 err: 1746 - reloc_cache_reset(&eb->reloc_cache); 1699 + reloc_cache_reset(&eb->reloc_cache, eb); 1747 1700 return err; 1748 1701 } 1749 1702 ··· 1883 1836 goto out; 1884 1837 } 1885 1838 1839 + /* We may process another execbuffer during the unlock... */ 1840 + eb_release_vmas(eb, false); 1841 + i915_gem_ww_ctx_fini(&eb->ww); 1842 + 1886 1843 /* 1887 1844 * We take 3 passes through the slowpatch. 1888 1845 * ··· 1912 1861 1913 1862 flush_workqueue(eb->i915->mm.userptr_wq); 1914 1863 1864 + i915_gem_ww_ctx_init(&eb->ww, true); 1915 1865 if (err) 1916 1866 goto out; 1917 1867 1918 - err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); 1868 + /* reacquire the objects */ 1869 + repeat_validate: 1870 + err = eb_validate_vmas(eb); 1919 1871 if (err) 1920 - goto out; 1872 + goto err; 1873 + 1874 + GEM_BUG_ON(!eb->batch); 1921 1875 1922 1876 list_for_each_entry(ev, &eb->relocs, reloc_link) { 1923 1877 if (!have_copy) { ··· 1938 1882 } 1939 1883 } 1940 1884 1941 - mutex_unlock(&eb->i915->drm.struct_mutex); 1885 + if (err == -EDEADLK) 1886 + goto err; 1887 + 1942 1888 if (err && !have_copy) 1943 1889 goto repeat; 1944 1890 ··· 1960 1902 */ 1961 1903 1962 1904 err: 1905 + if (err == -EDEADLK) { 1906 + eb_release_vmas(eb, false); 1907 + err = i915_gem_ww_ctx_backoff(&eb->ww); 1908 + if (!err) 1909 + goto repeat_validate; 1910 + } 1911 + 1963 1912 if (err == -EAGAIN) 1964 1913 goto repeat; 1965 1914 ··· 1995 1930 { 1996 1931 int err; 1997 1932 1998 - err = eb_lookup_vmas(eb); 1999 - if (err) 2000 - return err; 2001 - 2002 - if (!list_empty(&eb->unbound)) { 2003 - err = eb_reserve(eb); 2004 - if (err) 2005 - return err; 2006 - } 1933 + retry: 1934 + err = eb_validate_vmas(eb); 1935 + if (err == -EAGAIN) 1936 + goto slow; 1937 + else if (err) 1938 + goto err; 2007 1939 2008 1940 /* The objects are in their final locations, apply the relocations. */ 2009 1941 if (eb->args->flags & __EXEC_HAS_RELOC) { ··· 2012 1950 break; 2013 1951 } 2014 1952 2015 - if (err) 2016 - return eb_relocate_parse_slow(eb); 1953 + if (err == -EDEADLK) 1954 + goto err; 1955 + else if (err) 1956 + goto slow; 2017 1957 } 2018 1958 2019 - return eb_parse(eb); 1959 + if (!err) 1960 + err = eb_parse(eb); 1961 + 1962 + err: 1963 + if (err == -EDEADLK) { 1964 + eb_release_vmas(eb, false); 1965 + err = i915_gem_ww_ctx_backoff(&eb->ww); 1966 + if (!err) 1967 + goto retry; 1968 + } 1969 + 1970 + return err; 1971 + 1972 + slow: 1973 + err = eb_relocate_parse_slow(eb); 1974 + if (err) 1975 + /* 1976 + * If the user expects the execobject.offset and 1977 + * reloc.presumed_offset to be an exact match, 1978 + * as for using NO_RELOC, then we cannot update 1979 + * the execobject.offset until we have completed 1980 + * relocation. 1981 + */ 1982 + eb->args->flags &= ~__EXEC_HAS_RELOC; 1983 + 1984 + return err; 2020 1985 } 2021 1986 2022 1987 static int eb_move_to_gpu(struct i915_execbuffer *eb) 2023 1988 { 2024 1989 const unsigned int count = eb->buffer_count; 2025 - struct ww_acquire_ctx acquire; 2026 - unsigned int i; 1990 + unsigned int i = count; 2027 1991 int err = 0; 2028 - 2029 - ww_acquire_init(&acquire, &reservation_ww_class); 2030 - 2031 - for (i = 0; i < count; i++) { 2032 - struct eb_vma *ev = &eb->vma[i]; 2033 - struct i915_vma *vma = ev->vma; 2034 - 2035 - err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); 2036 - if (err == -EDEADLK) { 2037 - GEM_BUG_ON(i == 0); 2038 - do { 2039 - int j = i - 1; 2040 - 2041 - ww_mutex_unlock(&eb->vma[j].vma->resv->lock); 2042 - 2043 - swap(eb->vma[i], eb->vma[j]); 2044 - } while (--i); 2045 - 2046 - err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, 2047 - &acquire); 2048 - } 2049 - if (err) 2050 - break; 2051 - } 2052 - ww_acquire_done(&acquire); 2053 1992 2054 1993 while (i--) { 2055 1994 struct eb_vma *ev = &eb->vma[i]; ··· 2095 2032 2096 2033 if (err == 0) 2097 2034 err = i915_vma_move_to_active(vma, eb->request, flags); 2098 - 2099 - i915_vma_unlock(vma); 2100 2035 } 2101 - ww_acquire_fini(&acquire); 2102 2036 2103 2037 if (unlikely(err)) 2104 2038 goto err_skip; ··· 2287 2227 if (err) 2288 2228 goto err_commit; 2289 2229 2290 - err = dma_resv_lock_interruptible(pw->batch->resv, NULL); 2291 - if (err) 2292 - goto err_commit; 2293 - 2294 2230 err = dma_resv_reserve_shared(pw->batch->resv, 1); 2295 2231 if (err) 2296 - goto err_commit_unlock; 2232 + goto err_commit; 2297 2233 2298 2234 /* Wait for all writes (and relocs) into the batch to complete */ 2299 2235 err = i915_sw_fence_await_reservation(&pw->base.chain, 2300 2236 pw->batch->resv, NULL, false, 2301 2237 0, I915_FENCE_GFP); 2302 2238 if (err < 0) 2303 - goto err_commit_unlock; 2239 + goto err_commit; 2304 2240 2305 2241 /* Keep the batch alive and unwritten as we parse */ 2306 2242 dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); 2307 2243 2308 - dma_resv_unlock(pw->batch->resv); 2309 - 2310 2244 /* Force execution to wait for completion of the parser */ 2311 - dma_resv_lock(shadow->resv, NULL); 2312 2245 dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); 2313 - dma_resv_unlock(shadow->resv); 2314 2246 2315 2247 dma_fence_work_commit_imm(&pw->base); 2316 2248 return 0; 2317 2249 2318 - err_commit_unlock: 2319 - dma_resv_unlock(pw->batch->resv); 2320 2250 err_commit: 2321 2251 i915_sw_fence_set_error_once(&pw->base.chain, err); 2322 2252 dma_fence_work_commit_imm(&pw->base); ··· 2324 2274 static int eb_parse(struct i915_execbuffer *eb) 2325 2275 { 2326 2276 struct drm_i915_private *i915 = eb->i915; 2327 - struct intel_gt_buffer_pool_node *pool; 2277 + struct intel_gt_buffer_pool_node *pool = eb->batch_pool; 2328 2278 struct i915_vma *shadow, *trampoline; 2329 2279 unsigned int len; 2330 2280 int err; ··· 2347 2297 len += I915_CMD_PARSER_TRAMPOLINE_SIZE; 2348 2298 } 2349 2299 2350 - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); 2351 - if (IS_ERR(pool)) 2352 - return PTR_ERR(pool); 2300 + if (!pool) { 2301 + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); 2302 + if (IS_ERR(pool)) 2303 + return PTR_ERR(pool); 2304 + eb->batch_pool = pool; 2305 + } 2306 + 2307 + err = i915_gem_object_lock(pool->obj, &eb->ww); 2308 + if (err) 2309 + goto err; 2353 2310 2354 2311 shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); 2355 2312 if (IS_ERR(shadow)) { ··· 2402 2345 err_shadow: 2403 2346 i915_vma_unpin(shadow); 2404 2347 err: 2405 - intel_gt_buffer_pool_put(pool); 2406 2348 return err; 2407 2349 } 2408 2350 ··· 3056 3000 eb.exec = exec; 3057 3001 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); 3058 3002 eb.vma[0].vma = NULL; 3003 + eb.reloc_pool = eb.batch_pool = NULL; 3059 3004 3060 3005 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 3061 3006 reloc_cache_init(&eb.reloc_cache, eb.i915); ··· 3129 3072 if (unlikely(err)) 3130 3073 goto err_context; 3131 3074 3075 + err = eb_lookup_vmas(&eb); 3076 + if (err) { 3077 + eb_release_vmas(&eb, true); 3078 + goto err_engine; 3079 + } 3080 + 3081 + i915_gem_ww_ctx_init(&eb.ww, true); 3082 + 3132 3083 err = eb_relocate_parse(&eb); 3133 3084 if (err) { 3134 3085 /* ··· 3149 3084 args->flags &= ~__EXEC_HAS_RELOC; 3150 3085 goto err_vma; 3151 3086 } 3087 + 3088 + ww_acquire_done(&eb.ww.ctx); 3152 3089 3153 3090 /* 3154 3091 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure ··· 3172 3105 vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0); 3173 3106 if (IS_ERR(vma)) { 3174 3107 err = PTR_ERR(vma); 3175 - goto err_parse; 3108 + goto err_vma; 3176 3109 } 3177 3110 3178 3111 batch = vma; ··· 3224 3157 * to explicitly hold another reference here. 3225 3158 */ 3226 3159 eb.request->batch = batch; 3227 - if (batch->private) 3228 - intel_gt_buffer_pool_mark_active(batch->private, eb.request); 3160 + if (eb.batch_pool) 3161 + intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); 3229 3162 3230 3163 trace_i915_request_queue(eb.request, eb.batch_flags); 3231 3164 err = eb_submit(&eb, batch); ··· 3251 3184 err_batch_unpin: 3252 3185 if (eb.batch_flags & I915_DISPATCH_SECURE) 3253 3186 i915_vma_unpin(batch); 3254 - err_parse: 3255 - if (batch->private) 3256 - intel_gt_buffer_pool_put(batch->private); 3257 3187 err_vma: 3258 - if (eb.exec) 3259 - eb_release_vmas(&eb); 3188 + eb_release_vmas(&eb, true); 3260 3189 if (eb.trampoline) 3261 3190 i915_vma_unpin(eb.trampoline); 3191 + WARN_ON(err == -EDEADLK); 3192 + i915_gem_ww_ctx_fini(&eb.ww); 3193 + 3194 + if (eb.batch_pool) 3195 + intel_gt_buffer_pool_put(eb.batch_pool); 3196 + if (eb.reloc_pool) 3197 + intel_gt_buffer_pool_put(eb.reloc_pool); 3198 + err_engine: 3262 3199 eb_unpin_engine(&eb); 3263 3200 err_context: 3264 3201 i915_gem_context_put(eb.gem_context);

+38 -24

drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c

··· 32 32 if (IS_ERR(vma)) 33 33 return PTR_ERR(vma); 34 34 35 + err = i915_gem_object_lock(obj, &eb->ww); 36 + if (err) 37 + return err; 38 + 35 39 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); 36 40 if (err) 37 41 return err; 38 42 39 43 /* 8-Byte aligned */ 40 - if (!__reloc_entry_gpu(eb, vma, 41 - offsets[0] * sizeof(u32), 42 - 0)) { 43 - err = -EIO; 44 - goto unpin_vma; 45 - } 44 + err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); 45 + if (err <= 0) 46 + goto reloc_err; 46 47 47 48 /* !8-Byte aligned */ 48 - if (!__reloc_entry_gpu(eb, vma, 49 - offsets[1] * sizeof(u32), 50 - 1)) { 51 - err = -EIO; 52 - goto unpin_vma; 53 - } 49 + err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); 50 + if (err <= 0) 51 + goto reloc_err; 54 52 55 53 /* Skip to the end of the cmd page */ 56 54 i = PAGE_SIZE / sizeof(u32) - 1; ··· 58 60 eb->reloc_cache.rq_size += i; 59 61 60 62 /* Force next batch */ 61 - if (!__reloc_entry_gpu(eb, vma, 62 - offsets[2] * sizeof(u32), 63 - 2)) { 64 - err = -EIO; 65 - goto unpin_vma; 66 - } 63 + err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); 64 + if (err <= 0) 65 + goto reloc_err; 67 66 68 67 GEM_BUG_ON(!eb->reloc_cache.rq); 69 68 rq = i915_request_get(eb->reloc_cache.rq); 70 - reloc_gpu_flush(&eb->reloc_cache); 69 + reloc_gpu_flush(eb, &eb->reloc_cache); 71 70 GEM_BUG_ON(eb->reloc_cache.rq); 72 71 73 72 err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); ··· 96 101 unpin_vma: 97 102 i915_vma_unpin(vma); 98 103 return err; 104 + 105 + reloc_err: 106 + if (!err) 107 + err = -EIO; 108 + goto unpin_vma; 99 109 } 100 110 101 111 static int igt_gpu_reloc(void *arg) ··· 122 122 goto err_scratch; 123 123 } 124 124 125 + intel_gt_pm_get(&eb.i915->gt); 126 + 125 127 for_each_uabi_engine(eb.engine, eb.i915) { 126 128 reloc_cache_init(&eb.reloc_cache, eb.i915); 127 129 memset(map, POISON_INUSE, 4096); ··· 134 132 err = PTR_ERR(eb.context); 135 133 goto err_pm; 136 134 } 135 + eb.reloc_pool = NULL; 137 136 137 + i915_gem_ww_ctx_init(&eb.ww, false); 138 + retry: 138 139 err = intel_context_pin(eb.context); 139 - if (err) 140 - goto err_put; 140 + if (!err) { 141 + err = __igt_gpu_reloc(&eb, scratch); 141 142 142 - err = __igt_gpu_reloc(&eb, scratch); 143 + intel_context_unpin(eb.context); 144 + } 145 + if (err == -EDEADLK) { 146 + err = i915_gem_ww_ctx_backoff(&eb.ww); 147 + if (!err) 148 + goto retry; 149 + } 150 + i915_gem_ww_ctx_fini(&eb.ww); 143 151 144 - intel_context_unpin(eb.context); 145 - err_put: 152 + if (eb.reloc_pool) 153 + intel_gt_buffer_pool_put(eb.reloc_pool); 154 + 146 155 intel_context_put(eb.context); 147 156 err_pm: 148 157 intel_engine_pm_put(eb.engine); ··· 164 151 if (igt_flush_test(eb.i915)) 165 152 err = -EIO; 166 153 154 + intel_gt_pm_put(&eb.i915->gt); 167 155 err_scratch: 168 156 i915_gem_object_put(scratch); 169 157 return err;

drivers/gpu/drm/i915/i915_gem.c

··· 1360 1360 } 1361 1361 } 1362 1362 1363 + void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) 1364 + { 1365 + list_del(&obj->obj_link); 1366 + i915_gem_object_unlock(obj); 1367 + } 1368 + 1363 1369 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) 1364 1370 { 1365 1371 i915_gem_ww_ctx_unlock_all(ww);

drivers/gpu/drm/i915/i915_gem.h

··· 126 126 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); 127 127 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); 128 128 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); 129 + void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); 129 130 130 131 #endif /* __I915_GEM_H__ */