Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-intel-gt-next-2025-09-01' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next

Driver Changes:

- Apply multiple JSL/EHL/Gen7/Gen6 workaround properly at context level (Sebastian)
- Protect against overflow in active_engine() (Krzysztof)
- Use try_cmpxchg64() in __active_lookup() (Uros)

- Enable GuC CT_DEAD output in regular debug builds (John)
- Static checker and style fixes (Sebastian)
- Selftest improvements (Krzysztof)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://lore.kernel.org/r/aLWZoEZVlBj2d8J9@jlahtine-mobl

+98 -98
+29 -28
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
··· 1382 1382 */ 1383 1383 if (flushes & CLFLUSH_AFTER) 1384 1384 drm_clflush_virt_range(addr, sizeof(*addr)); 1385 - } else 1385 + } else { 1386 1386 *addr = value; 1387 + } 1387 1388 } 1388 1389 1389 1390 static u64 ··· 1568 1567 do { 1569 1568 u64 offset = eb_relocate_entry(eb, ev, r); 1570 1569 1571 - if (likely(offset == 0)) { 1572 - } else if ((s64)offset < 0) { 1570 + if (likely(offset == 0)) 1571 + continue; 1572 + 1573 + if ((s64)offset < 0) { 1573 1574 remain = (int)offset; 1574 1575 goto out; 1575 - } else { 1576 - /* 1577 - * Note that reporting an error now 1578 - * leaves everything in an inconsistent 1579 - * state as we have *already* changed 1580 - * the relocation value inside the 1581 - * object. As we have not changed the 1582 - * reloc.presumed_offset or will not 1583 - * change the execobject.offset, on the 1584 - * call we may not rewrite the value 1585 - * inside the object, leaving it 1586 - * dangling and causing a GPU hang. Unless 1587 - * userspace dynamically rebuilds the 1588 - * relocations on each execbuf rather than 1589 - * presume a static tree. 1590 - * 1591 - * We did previously check if the relocations 1592 - * were writable (access_ok), an error now 1593 - * would be a strange race with mprotect, 1594 - * having already demonstrated that we 1595 - * can read from this userspace address. 1596 - */ 1597 - offset = gen8_canonical_addr(offset & ~UPDATE); 1598 - __put_user(offset, 1599 - &urelocs[r - stack].presumed_offset); 1600 1576 } 1577 + /* 1578 + * Note that reporting an error now 1579 + * leaves everything in an inconsistent 1580 + * state as we have *already* changed 1581 + * the relocation value inside the 1582 + * object. As we have not changed the 1583 + * reloc.presumed_offset or will not 1584 + * change the execobject.offset, on the 1585 + * call we may not rewrite the value 1586 + * inside the object, leaving it 1587 + * dangling and causing a GPU hang. Unless 1588 + * userspace dynamically rebuilds the 1589 + * relocations on each execbuf rather than 1590 + * presume a static tree. 1591 + * 1592 + * We did previously check if the relocations 1593 + * were writable (access_ok), an error now 1594 + * would be a strange race with mprotect, 1595 + * having already demonstrated that we 1596 + * can read from this userspace address. 1597 + */ 1598 + offset = gen8_canonical_addr(offset & ~UPDATE); 1599 + __put_user(offset, &urelocs[r - stack].presumed_offset); 1601 1600 } while (r++, --count); 1602 1601 urelocs += ARRAY_SIZE(stack); 1603 1602 } while (remain);
+32 -38
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
··· 1096 1096 unsigned long addr, 1097 1097 bool unfaultable) 1098 1098 { 1099 - struct vm_area_struct *area; 1100 - int err = 0, i; 1099 + int i; 1101 1100 1102 1101 pr_info("igt_mmap(%s, %d) @ %lx\n", 1103 1102 obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr); 1104 - 1105 - mmap_read_lock(current->mm); 1106 - area = vma_lookup(current->mm, addr); 1107 - mmap_read_unlock(current->mm); 1108 - if (!area) { 1109 - pr_err("%s: Did not create a vm_area_struct for the mmap\n", 1110 - obj->mm.region->name); 1111 - err = -EINVAL; 1112 - goto out_unmap; 1113 - } 1114 1103 1115 1104 for (i = 0; i < obj->base.size / sizeof(u32); i++) { 1116 1105 u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); 1117 1106 u32 x; 1118 1107 1119 1108 if (get_user(x, ux)) { 1120 - err = -EFAULT; 1121 1109 if (!unfaultable) { 1122 1110 pr_err("%s: Unable to read from mmap, offset:%zd\n", 1123 1111 obj->mm.region->name, i * sizeof(x)); 1124 - goto out_unmap; 1112 + return -EFAULT; 1125 1113 } 1126 1114 1127 1115 continue; ··· 1118 1130 if (unfaultable) { 1119 1131 pr_err("%s: Faulted unmappable memory\n", 1120 1132 obj->mm.region->name); 1121 - err = -EINVAL; 1122 - goto out_unmap; 1133 + return -EINVAL; 1123 1134 } 1124 1135 1125 1136 if (x != expand32(POISON_INUSE)) { 1126 1137 pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", 1127 1138 obj->mm.region->name, 1128 1139 i * sizeof(x), x, expand32(POISON_INUSE)); 1129 - err = -EINVAL; 1130 - goto out_unmap; 1140 + return -EINVAL; 1131 1141 } 1132 1142 1133 1143 x = expand32(POISON_FREE); 1134 1144 if (put_user(x, ux)) { 1135 1145 pr_err("%s: Unable to write to mmap, offset:%zd\n", 1136 1146 obj->mm.region->name, i * sizeof(x)); 1137 - err = -EFAULT; 1138 - goto out_unmap; 1147 + return -EFAULT; 1139 1148 } 1140 1149 } 1141 1150 1142 - if (unfaultable) { 1143 - if (err == -EFAULT) 1144 - err = 0; 1145 - } else { 1146 - obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; 1147 - err = wc_check(obj); 1148 - } 1149 - out_unmap: 1150 - vm_munmap(addr, obj->base.size); 1151 - return err; 1151 + if (unfaultable) 1152 + return 0; 1153 + 1154 + obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; 1155 + return wc_check(obj); 1152 1156 } 1153 1157 1154 1158 #define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0) ··· 1156 1176 struct drm_i915_private *i915 = placements[0]->i915; 1157 1177 struct drm_i915_gem_object *obj; 1158 1178 struct i915_request *rq = NULL; 1179 + struct vm_area_struct *area; 1159 1180 unsigned long addr; 1160 1181 LIST_HEAD(objects); 1161 1182 u64 offset; ··· 1188 1207 goto out_put; 1189 1208 } 1190 1209 1210 + mmap_read_lock(current->mm); 1211 + area = vma_lookup(current->mm, addr); 1212 + mmap_read_unlock(current->mm); 1213 + if (!area) { 1214 + pr_err("%s: Did not create a vm_area_struct for the mmap\n", 1215 + obj->mm.region->name); 1216 + err = -EINVAL; 1217 + goto out_addr; 1218 + } 1219 + 1191 1220 if (flags & IGT_MMAP_MIGRATE_FILL) { 1192 1221 err = igt_fill_mappable(placements[0], &objects); 1193 1222 if (err) 1194 - goto out_put; 1223 + goto out_addr; 1195 1224 } 1196 1225 1197 1226 err = i915_gem_object_lock(obj, NULL); 1198 1227 if (err) 1199 - goto out_put; 1228 + goto out_addr; 1200 1229 1201 1230 err = i915_gem_object_pin_pages(obj); 1202 1231 if (err) { 1203 1232 i915_gem_object_unlock(obj); 1204 - goto out_put; 1233 + goto out_addr; 1205 1234 } 1206 1235 1207 1236 err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, ··· 1219 1228 i915_gem_object_is_lmem(obj), 1220 1229 expand32(POISON_INUSE), &rq); 1221 1230 i915_gem_object_unpin_pages(obj); 1222 - if (rq) { 1231 + if (rq && !err) { 1223 1232 err = dma_resv_reserve_fences(obj->base.resv, 1); 1224 1233 if (!err) 1225 1234 dma_resv_add_fence(obj->base.resv, &rq->fence, ··· 1228 1237 } 1229 1238 i915_gem_object_unlock(obj); 1230 1239 if (err) 1231 - goto out_put; 1240 + goto out_addr; 1232 1241 1233 1242 if (flags & IGT_MMAP_MIGRATE_EVICTABLE) 1234 1243 igt_make_evictable(&objects); ··· 1236 1245 if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { 1237 1246 err = i915_gem_object_lock(obj, NULL); 1238 1247 if (err) 1239 - goto out_put; 1248 + goto out_addr; 1240 1249 1241 1250 /* 1242 - * Ensure we only simulate the gpu failuire when faulting the 1251 + * Ensure we only simulate the gpu failure when faulting the 1243 1252 * pages. 1244 1253 */ 1245 1254 err = i915_gem_object_wait_moving_fence(obj, true); 1246 1255 i915_gem_object_unlock(obj); 1247 1256 if (err) 1248 - goto out_put; 1257 + goto out_addr; 1249 1258 i915_ttm_migrate_set_failure_modes(true, false); 1250 1259 } 1251 1260 ··· 1288 1297 err = -EINVAL; 1289 1298 } 1290 1299 } 1300 + 1301 + out_addr: 1302 + vm_munmap(addr, obj->base.size); 1291 1303 1292 1304 out_put: 1293 1305 i915_gem_object_put(obj);
+25 -24
drivers/gpu/drm/i915/gt/intel_workarounds.c
··· 337 337 struct i915_wa_list *wal) 338 338 { 339 339 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); 340 + 341 + /* WaDisable_RenderCache_OperationalFlush:snb */ 342 + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 340 343 } 341 344 342 345 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, 343 346 struct i915_wa_list *wal) 344 347 { 345 348 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); 349 + /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ 350 + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 351 + 352 + /* 353 + * BSpec says this must be set, even though 354 + * WaDisable4x2SubspanOptimization:ivb,hsw 355 + * WaDisable4x2SubspanOptimization isn't listed for VLV. 356 + */ 357 + wa_masked_en(wal, 358 + CACHE_MODE_1, 359 + PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 346 360 } 347 361 348 362 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, ··· 648 634 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 649 635 struct i915_wa_list *wal) 650 636 { 637 + struct drm_i915_private *i915 = engine->i915; 638 + 651 639 /* Wa_1406697149 (WaDisableBankHangMode:icl) */ 652 640 wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); 653 641 ··· 685 669 686 670 /* Wa_1406306137:icl,ehl */ 687 671 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); 672 + 673 + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { 674 + /* 675 + * Disable Repacking for Compression (masked R/W access) 676 + * before rendering compressed surfaces for display. 677 + */ 678 + wa_masked_en(wal, CACHE_MODE_0_GEN7, 679 + DISABLE_REPACKING_FOR_COMPRESSION); 680 + } 688 681 } 689 682 690 683 /* ··· 2331 2306 GEN8_RC_SEMA_IDLE_MSG_DISABLE); 2332 2307 } 2333 2308 2334 - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { 2335 - /* 2336 - * "Disable Repacking for Compression (masked R/W access) 2337 - * before rendering compressed surfaces for display." 2338 - */ 2339 - wa_masked_en(wal, CACHE_MODE_0_GEN7, 2340 - DISABLE_REPACKING_FOR_COMPRESSION); 2341 - } 2342 - 2343 2309 if (GRAPHICS_VER(i915) == 11) { 2344 2310 /* This is not an Wa. Enable for better image quality */ 2345 2311 wa_masked_en(wal, ··· 2581 2565 RING_MODE_GEN7(RENDER_RING_BASE), 2582 2566 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); 2583 2567 2584 - /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ 2585 - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 2586 - 2587 - /* 2588 - * BSpec says this must be set, even though 2589 - * WaDisable4x2SubspanOptimization:ivb,hsw 2590 - * WaDisable4x2SubspanOptimization isn't listed for VLV. 2591 - */ 2592 - wa_masked_en(wal, 2593 - CACHE_MODE_1, 2594 - PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 2595 - 2596 2568 /* 2597 2569 * BSpec recommends 8x4 when MSAA is used, 2598 2570 * however in practice 16x4 seems fastest. ··· 2646 2642 GEN6_GT_MODE, 2647 2643 GEN6_WIZ_HASHING_MASK, 2648 2644 GEN6_WIZ_HASHING_16x4); 2649 - 2650 - /* WaDisable_RenderCache_OperationalFlush:snb */ 2651 - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 2652 2645 2653 2646 /* 2654 2647 * From the Sandybridge PRM, volume 1 part 3, page 24:
+2
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
··· 904 904 arg->result = PTR_ERR(ce[count]); 905 905 pr_err("[%s] Create context #%ld failed: %d!\n", 906 906 engine->name, count, arg->result); 907 + if (!count) 908 + return; 907 909 while (--count) 908 910 intel_context_put(ce[count]); 909 911 return;
+7 -4
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
··· 13 13 #include "intel_guc_ct.h" 14 14 #include "intel_guc_print.h" 15 15 16 - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) 16 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 17 17 enum { 18 18 CT_DEAD_ALIVE = 0, 19 19 CT_DEAD_SETUP, ··· 144 144 spin_lock_init(&ct->requests.lock); 145 145 INIT_LIST_HEAD(&ct->requests.pending); 146 146 INIT_LIST_HEAD(&ct->requests.incoming); 147 - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) 147 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 148 148 INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); 149 149 #endif 150 150 INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); ··· 373 373 374 374 ct->enabled = true; 375 375 ct->stall_time = KTIME_MAX; 376 - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) 376 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 377 377 ct->dead_ct_reported = false; 378 378 ct->dead_ct_reason = CT_DEAD_ALIVE; 379 379 #endif ··· 1377 1377 ct->ctbs.recv.desc->tail); 1378 1378 } 1379 1379 1380 - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) 1380 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 1381 1381 static void ct_dead_ct_worker_func(struct work_struct *w) 1382 1382 { 1383 1383 struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); 1384 1384 struct intel_guc *guc = ct_to_guc(ct); 1385 1385 1386 1386 if (ct->dead_ct_reported) 1387 + return; 1388 + 1389 + if (i915_error_injected()) 1387 1390 return; 1388 1391 1389 1392 ct->dead_ct_reported = true;
+1 -1
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
··· 97 97 /** @stall_time: time of first time a CTB submission is stalled */ 98 98 ktime_t stall_time; 99 99 100 - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) 100 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 101 101 int dead_ct_reason; 102 102 bool dead_ct_reported; 103 103 struct work_struct dead_ct_worker;
+2 -3
drivers/gpu/drm/i915/i915_active.c
··· 257 257 * claimed the cache and we know that is does not match our 258 258 * idx. If, and only if, the timeline is currently zero is it 259 259 * worth competing to claim it atomically for ourselves (for 260 - * only the winner of that race will cmpxchg return the old 261 - * value of 0). 260 + * only the winner of that race will cmpxchg succeed). 262 261 */ 263 - if (!cached && !cmpxchg64(&it->timeline, 0, idx)) 262 + if (!cached && try_cmpxchg64(&it->timeline, &cached, idx)) 264 263 return it; 265 264 } 266 265