Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-xe-next-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

UAPI Changes:
- Define and parse OA sync properties (Ashutosh)

Driver Changes:
- Add caller info to xe_gt_reset_async (Nirmoy)
- A large forcewake rework / cleanup (Himal)
- A g2h response timeout fix (Badal)
- A PTL workaround (Vinay)
- Handle unreliable MMIO reads during forcewake (Shuicheng)
- Ufence user-space access fixes (Nirmoy)
- Annotate flexible arrays (Matthew Brost)
- Enable GuC lite restore (Fei)
- Prevent GuC register capture on VF (Zhanjun)
- Show VFs VRAM / LMEM provisioning summary over debugfs (Michal)
- Parallel queues fix on GT reset (Nirmoy)
- Move reference grabbing to a job's dma-fence (Matt Brost)
- Mark a number of local workqueues WQ_MEM_RECLAIM (Matt Brost)
- OA synchronization support (Ashutosh)
- Capture all available bits of GuC timestamp to GuC log (John)
- Increase readability of guc_info debugfs (John)
- Add a mmio barrier before GGTT invalidate (Matt Brost)
- Don't short-circuit TDR on jobs not started (Matt Brost)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZyNvA_vZZYR-1eWE@fedora

+1012 -518
+1
drivers/gpu/drm/xe/abi/guc_klvs_abi.h
··· 352 352 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, 353 353 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, 354 354 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, 355 + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, 355 356 }; 356 357 357 358 #endif
+4 -2
drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
··· 42 42 struct xe_gt *gt = tile->media_gt; 43 43 struct xe_gsc *gsc = &gt->uc.gsc; 44 44 bool ret = true; 45 + unsigned int fw_ref; 45 46 46 47 if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) { 47 48 drm_dbg_kms(&xe->drm, ··· 51 50 } 52 51 53 52 xe_pm_runtime_get(xe); 54 - if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { 53 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 54 + if (!fw_ref) { 55 55 drm_dbg_kms(&xe->drm, 56 56 "failed to get forcewake to check proxy status\n"); 57 57 ret = false; ··· 62 60 if (!xe_gsc_proxy_init_done(gsc)) 63 61 ret = false; 64 62 65 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 63 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 66 64 out: 67 65 xe_pm_runtime_put(xe); 68 66 return ret;
+2 -1
drivers/gpu/drm/xe/regs/xe_guc_regs.h
··· 84 84 #define HUC_LOADING_AGENT_GUC REG_BIT(1) 85 85 #define GUC_WOPCM_OFFSET_VALID REG_BIT(0) 86 86 #define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) 87 - #define GUC_PMTIMESTAMP XE_REG(0xc3e8) 87 + #define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8) 88 + #define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec) 88 89 89 90 #define GUC_SEND_INTERRUPT XE_REG(0xc4c8) 90 91 #define GUC_SEND_TRIGGER REG_BIT(0)
+8 -10
drivers/gpu/drm/xe/tests/xe_mocs.c
··· 43 43 { 44 44 struct kunit *test = kunit_get_current_test(); 45 45 u32 l3cc, l3cc_expected; 46 - unsigned int i; 46 + unsigned int fw_ref, i; 47 47 u32 reg_val; 48 - u32 ret; 49 48 50 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 51 - KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); 49 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 50 + KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); 52 51 53 52 for (i = 0; i < info->num_mocs_regs; i++) { 54 53 if (!(i & 1)) { ··· 71 72 KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, 72 73 "l3cc idx=%u has incorrect val.\n", i); 73 74 } 74 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 75 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 75 76 } 76 77 77 78 static void read_mocs_table(struct xe_gt *gt, ··· 79 80 { 80 81 struct kunit *test = kunit_get_current_test(); 81 82 u32 mocs, mocs_expected; 82 - unsigned int i; 83 + unsigned int fw_ref, i; 83 84 u32 reg_val; 84 - u32 ret; 85 85 86 86 KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, 87 87 "Unused entries index should have been defined\n"); 88 88 89 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 90 - KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); 89 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 90 + KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); 91 91 92 92 for (i = 0; i < info->num_mocs_regs; i++) { 93 93 if (regs_are_mcr(gt)) ··· 104 106 "mocs reg 0x%x has incorrect val.\n", i); 105 107 } 106 108 107 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 109 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 108 110 } 109 111 110 112 static int mocs_kernel_test_run_device(struct xe_device *xe)
+23 -4
drivers/gpu/drm/xe/xe_debugfs.c
··· 90 90 { 91 91 struct xe_device *xe = inode->i_private; 92 92 struct xe_gt *gt; 93 - u8 id; 93 + u8 id, last_gt; 94 + unsigned int fw_ref; 94 95 95 96 xe_pm_runtime_get(xe); 96 - for_each_gt(gt, xe, id) 97 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 97 + for_each_gt(gt, xe, id) { 98 + last_gt = id; 99 + 100 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 101 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 102 + goto err_fw_get; 103 + } 98 104 99 105 return 0; 106 + 107 + err_fw_get: 108 + for_each_gt(gt, xe, id) { 109 + if (id < last_gt) 110 + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 111 + else if (id == last_gt) 112 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 113 + else 114 + break; 115 + } 116 + 117 + xe_pm_runtime_put(xe); 118 + return -ETIMEDOUT; 100 119 } 101 120 102 121 static int forcewake_release(struct inode *inode, struct file *file) ··· 125 106 u8 id; 126 107 127 108 for_each_gt(gt, xe, id) 128 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 109 + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 129 110 xe_pm_runtime_put(xe); 130 111 131 112 return 0;
+9 -7
drivers/gpu/drm/xe/xe_devcoredump.c
··· 158 158 { 159 159 struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); 160 160 struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); 161 + unsigned int fw_ref; 161 162 162 163 /* keep going if fw fails as we still want to save the memory and SW data */ 163 - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) 164 + fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); 165 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 164 166 xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); 165 167 xe_vm_snapshot_capture_delayed(ss->vm); 166 168 xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); 167 - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); 169 + xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); 168 170 169 171 /* Calculate devcoredump size */ 170 172 ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); ··· 238 236 u32 width_mask = (0x1 << q->width) - 1; 239 237 const char *process_name = "no process"; 240 238 241 - int i; 239 + unsigned int fw_ref; 242 240 bool cookie; 241 + int i; 243 242 244 243 ss->snapshot_time = ktime_get_real(); 245 244 ss->boot_time = ktime_get_boottime(); ··· 264 261 } 265 262 266 263 /* keep going if fw fails as we still want to save the memory and SW data */ 267 - if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) 268 - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); 264 + fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 269 265 270 266 ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); 271 - ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); 267 + ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); 272 268 ss->ge = xe_guc_exec_queue_snapshot_capture(q); 273 269 ss->job = xe_sched_job_snapshot_capture(job); 274 270 ss->vm = xe_vm_snapshot_capture(q->vm); ··· 276 274 277 275 queue_work(system_unbound_wq, &ss->work); 278 276 279 - xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 277 + xe_force_wake_put(gt_to_fw(q->gt), fw_ref); 280 278 dma_fence_end_signalling(cookie); 281 279 } 282 280
+14 -11
drivers/gpu/drm/xe/xe_device.c
··· 604 604 static int probe_has_flat_ccs(struct xe_device *xe) 605 605 { 606 606 struct xe_gt *gt; 607 + unsigned int fw_ref; 607 608 u32 reg; 608 - int err; 609 609 610 610 /* Always enabled/disabled, no runtime check to do */ 611 611 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) ··· 613 613 614 614 gt = xe_root_mmio_gt(xe); 615 615 616 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 617 - if (err) 618 - return err; 616 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 617 + if (!fw_ref) 618 + return -ETIMEDOUT; 619 619 620 620 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); 621 621 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); ··· 624 624 drm_dbg(&xe->drm, 625 625 "Flat CCS has been disabled in bios, May lead to performance impact"); 626 626 627 - return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 627 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 628 + return 0; 628 629 } 629 630 630 631 int xe_device_probe(struct xe_device *xe) ··· 876 875 void xe_device_td_flush(struct xe_device *xe) 877 876 { 878 877 struct xe_gt *gt; 878 + unsigned int fw_ref; 879 879 u8 id; 880 880 881 881 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) ··· 891 889 if (xe_gt_is_media_type(gt)) 892 890 continue; 893 891 894 - if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) 892 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 893 + if (!fw_ref) 895 894 return; 896 895 897 896 xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); ··· 907 904 150, NULL, false)) 908 905 xe_gt_err_once(gt, "TD flush timeout\n"); 909 906 910 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 907 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 911 908 } 912 909 } 913 910 914 911 void xe_device_l2_flush(struct xe_device *xe) 915 912 { 916 913 struct xe_gt *gt; 917 - int err; 914 + unsigned int fw_ref; 918 915 919 916 gt = xe_root_mmio_gt(xe); 920 917 921 918 if (!XE_WA(gt, 16023588340)) 922 919 return; 923 920 924 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 925 - if (err) 921 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 922 + if (!fw_ref) 926 923 return; 927 924 928 925 spin_lock(&gt->global_invl_lock); ··· 932 929 xe_gt_err_once(gt, "Global invalidation timeout\n"); 933 930 spin_unlock(&gt->global_invl_lock); 934 931 935 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 932 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 936 933 } 937 934 938 935 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+6 -2
drivers/gpu/drm/xe/xe_drm_client.c
··· 278 278 struct xe_hw_engine *hwe; 279 279 struct xe_exec_queue *q; 280 280 u64 gpu_timestamp; 281 + unsigned int fw_ref; 281 282 282 283 xe_pm_runtime_get(xe); 283 284 ··· 304 303 continue; 305 304 306 305 fw = xe_hw_engine_to_fw_domain(hwe); 307 - if (xe_force_wake_get(gt_to_fw(gt), fw)) { 306 + 307 + fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); 308 + if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { 308 309 hwe = NULL; 310 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 309 311 break; 310 312 } 311 313 312 314 gpu_timestamp = xe_hw_engine_read_timestamp(hwe); 313 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw)); 315 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 314 316 break; 315 317 } 316 318
+1 -1
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 143 143 /** @hw_engine_group_link: link into exec queues in the same hw engine group */ 144 144 struct list_head hw_engine_group_link; 145 145 /** @lrc: logical ring context for this exec queue */ 146 - struct xe_lrc *lrc[]; 146 + struct xe_lrc *lrc[] __counted_by(width); 147 147 }; 148 148 149 149 /**
+1 -1
drivers/gpu/drm/xe/xe_execlist.c
··· 313 313 q->ring_ops->emit_job(job); 314 314 xe_execlist_make_active(exl); 315 315 316 - return dma_fence_get(job->fence); 316 + return job->fence; 317 317 } 318 318 319 319 static void execlist_job_free(struct drm_sched_job *drm_job)
+91 -31
drivers/gpu/drm/xe/xe_force_wake.c
··· 21 21 return wake ? "wake" : "sleep"; 22 22 } 23 23 24 - static void domain_init(struct xe_force_wake_domain *domain, 24 + static void mark_domain_initialized(struct xe_force_wake *fw, 25 + enum xe_force_wake_domain_id id) 26 + { 27 + fw->initialized_domains |= BIT(id); 28 + } 29 + 30 + static void init_domain(struct xe_force_wake *fw, 25 31 enum xe_force_wake_domain_id id, 26 32 struct xe_reg reg, struct xe_reg ack) 27 33 { 34 + struct xe_force_wake_domain *domain = &fw->domains[id]; 35 + 28 36 domain->id = id; 29 37 domain->reg_ctl = reg; 30 38 domain->reg_ack = ack; 31 39 domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL); 32 40 domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL); 41 + 42 + mark_domain_initialized(fw, id); 33 43 } 34 44 35 45 void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) ··· 53 43 xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); 54 44 55 45 if (xe->info.graphics_verx100 >= 1270) { 56 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 57 - XE_FW_DOMAIN_ID_GT, 46 + init_domain(fw, XE_FW_DOMAIN_ID_GT, 58 47 FORCEWAKE_GT, 59 48 FORCEWAKE_ACK_GT_MTL); 60 49 } else { 61 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 62 - XE_FW_DOMAIN_ID_GT, 50 + init_domain(fw, XE_FW_DOMAIN_ID_GT, 63 51 FORCEWAKE_GT, 64 52 FORCEWAKE_ACK_GT); 65 53 } ··· 71 63 xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); 72 64 73 65 if (!xe_gt_is_media_type(gt)) 74 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], 75 - XE_FW_DOMAIN_ID_RENDER, 66 + init_domain(fw, XE_FW_DOMAIN_ID_RENDER, 76 67 FORCEWAKE_RENDER, 77 68 FORCEWAKE_ACK_RENDER); 78 69 ··· 79 72 if (!(gt->info.engine_mask & BIT(i))) 80 73 continue; 81 74 82 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], 83 - XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, 75 + init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, 84 76 FORCEWAKE_MEDIA_VDBOX(j), 85 77 FORCEWAKE_ACK_MEDIA_VDBOX(j)); 86 78 } ··· 88 82 if (!(gt->info.engine_mask & BIT(i))) 89 83 continue; 90 84 91 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], 92 - XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, 85 + init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, 93 86 FORCEWAKE_MEDIA_VEBOX(j), 94 87 FORCEWAKE_ACK_MEDIA_VEBOX(j)); 95 88 } 96 89 97 90 if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) 98 - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], 99 - XE_FW_DOMAIN_ID_GSC, 91 + init_domain(fw, XE_FW_DOMAIN_ID_GSC, 100 92 FORCEWAKE_GSC, 101 93 FORCEWAKE_ACK_GSC); 102 94 } ··· 160 156 (ffs(tmp__) - 1))) && \ 161 157 domain__->reg_ctl.addr) 162 158 163 - int xe_force_wake_get(struct xe_force_wake *fw, 164 - enum xe_force_wake_domains domains) 159 + /** 160 + * xe_force_wake_get() : Increase the domain refcount 161 + * @fw: struct xe_force_wake 162 + * @domains: forcewake domains to get refcount on 163 + * 164 + * This function wakes up @domains if they are asleep and takes references. 165 + * If requested domain is XE_FORCEWAKE_ALL then only applicable/initialized 166 + * domains will be considered for refcount and it is a caller responsibility 167 + * to check returned ref if it includes any specific domain by using 168 + * xe_force_wake_ref_has_domain() function. Caller must call 169 + * xe_force_wake_put() function to decrease incremented refcounts. 170 + * 171 + * Return: opaque reference to woken domains or zero if none of requested 172 + * domains were awake. 173 + */ 174 + unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, 175 + enum xe_force_wake_domains domains) 165 176 { 166 177 struct xe_gt *gt = fw->gt; 167 178 struct xe_force_wake_domain *domain; 168 - enum xe_force_wake_domains tmp, woken = 0; 179 + unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0; 180 + unsigned int tmp, ref_rqst; 169 181 unsigned long flags; 170 - int ret = 0; 171 182 183 + xe_gt_assert(gt, is_power_of_2(domains)); 184 + xe_gt_assert(gt, domains <= XE_FORCEWAKE_ALL); 185 + xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL || fw->initialized_domains & domains); 186 + 187 + ref_rqst = (domains == XE_FORCEWAKE_ALL) ? fw->initialized_domains : domains; 172 188 spin_lock_irqsave(&fw->lock, flags); 173 - for_each_fw_domain_masked(domain, domains, fw, tmp) { 189 + for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) { 174 190 if (!domain->ref++) { 175 - woken |= BIT(domain->id); 191 + awake_rqst |= BIT(domain->id); 176 192 domain_wake(gt, domain); 177 193 } 194 + ref_incr |= BIT(domain->id); 178 195 } 179 - for_each_fw_domain_masked(domain, woken, fw, tmp) { 180 - ret |= domain_wake_wait(gt, domain); 196 + for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) { 197 + if (domain_wake_wait(gt, domain) == 0) { 198 + fw->awake_domains |= BIT(domain->id); 199 + } else { 200 + awake_failed |= BIT(domain->id); 201 + --domain->ref; 202 + } 181 203 } 182 - fw->awake_domains |= woken; 204 + ref_incr &= ~awake_failed; 183 205 spin_unlock_irqrestore(&fw->lock, flags); 184 206 185 - return ret; 207 + xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n", 208 + str_plural(hweight_long(awake_failed)), awake_failed); 209 + 210 + if (domains == XE_FORCEWAKE_ALL && ref_incr == fw->initialized_domains) 211 + ref_incr |= XE_FORCEWAKE_ALL; 212 + 213 + return ref_incr; 186 214 } 187 215 188 - int xe_force_wake_put(struct xe_force_wake *fw, 189 - enum xe_force_wake_domains domains) 216 + /** 217 + * xe_force_wake_put - Decrement the refcount and put domain to sleep if refcount becomes 0 218 + * @fw: Pointer to the force wake structure 219 + * @fw_ref: return of xe_force_wake_get() 220 + * 221 + * This function reduces the reference counts for domains in fw_ref. If 222 + * refcount for any of the specified domain reaches 0, it puts the domain to sleep 223 + * and waits for acknowledgment for domain to sleep within 50 milisec timeout. 224 + * Warns in case of timeout of ack from domain. 225 + */ 226 + void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref) 190 227 { 191 228 struct xe_gt *gt = fw->gt; 192 229 struct xe_force_wake_domain *domain; 193 - enum xe_force_wake_domains tmp, sleep = 0; 230 + unsigned int tmp, sleep = 0; 194 231 unsigned long flags; 195 - int ret = 0; 232 + int ack_fail = 0; 233 + 234 + /* 235 + * Avoid unnecessary lock and unlock when the function is called 236 + * in error path of individual domains. 237 + */ 238 + if (!fw_ref) 239 + return; 240 + 241 + if (xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 242 + fw_ref = fw->initialized_domains; 196 243 197 244 spin_lock_irqsave(&fw->lock, flags); 198 - for_each_fw_domain_masked(domain, domains, fw, tmp) { 245 + for_each_fw_domain_masked(domain, fw_ref, fw, tmp) { 246 + xe_gt_assert(gt, domain->ref); 247 + 199 248 if (!--domain->ref) { 200 249 sleep |= BIT(domain->id); 201 250 domain_sleep(gt, domain); 202 251 } 203 252 } 204 253 for_each_fw_domain_masked(domain, sleep, fw, tmp) { 205 - ret |= domain_sleep_wait(gt, domain); 254 + if (domain_sleep_wait(gt, domain) == 0) 255 + fw->awake_domains &= ~BIT(domain->id); 256 + else 257 + ack_fail |= BIT(domain->id); 206 258 } 207 - fw->awake_domains &= ~sleep; 208 259 spin_unlock_irqrestore(&fw->lock, flags); 209 260 210 - return ret; 261 + xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n", 262 + str_plural(hweight_long(ack_fail)), ack_fail); 211 263 }
+19 -4
drivers/gpu/drm/xe/xe_force_wake.h
··· 15 15 struct xe_force_wake *fw); 16 16 void xe_force_wake_init_engines(struct xe_gt *gt, 17 17 struct xe_force_wake *fw); 18 - int xe_force_wake_get(struct xe_force_wake *fw, 19 - enum xe_force_wake_domains domains); 20 - int xe_force_wake_put(struct xe_force_wake *fw, 21 - enum xe_force_wake_domains domains); 18 + unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, 19 + enum xe_force_wake_domains domains); 20 + void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref); 22 21 23 22 static inline int 24 23 xe_force_wake_ref(struct xe_force_wake *fw, ··· 43 44 { 44 45 xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); 45 46 xe_gt_assert(fw->gt, fw->awake_domains & domain); 47 + } 48 + 49 + /** 50 + * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref 51 + * @fw_ref : the force_wake reference 52 + * @domain : forcewake domain to verify 53 + * 54 + * This function confirms whether the @fw_ref includes a reference to the 55 + * specified @domain. 56 + * 57 + * Return: true if domain is refcounted. 58 + */ 59 + static inline bool 60 + xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain) 61 + { 62 + return fw_ref & domain; 46 63 } 47 64 48 65 #endif
+4 -2
drivers/gpu/drm/xe/xe_force_wake_types.h
··· 48 48 XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), 49 49 XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), 50 50 XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), 51 - XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 51 + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) 52 52 }; 53 53 54 54 /** ··· 78 78 /** @lock: protects everything force wake struct */ 79 79 spinlock_t lock; 80 80 /** @awake_domains: mask of all domains awake */ 81 - enum xe_force_wake_domains awake_domains; 81 + unsigned int awake_domains; 82 + /** @initialized_domains: mask of all initialized domains */ 83 + unsigned int initialized_domains; 82 84 /** @domains: force wake domains */ 83 85 struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; 84 86 };
+2 -2
drivers/gpu/drm/xe/xe_ggtt.c
··· 246 246 else 247 247 ggtt->pt_ops = &xelp_pt_ops; 248 248 249 - ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); 249 + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); 250 250 251 251 drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 252 252 ggtt->size - xe_wopcm_size(xe)); ··· 409 409 * vs. correct GGTT page. Not particularly a hot code path so blindly 410 410 * do a mmio read here which results in GuC reading correct GGTT page. 411 411 */ 412 - xe_mmio_read32(&xe_root_mmio_gt(xe)->mmio, VF_CAP_REG); 412 + xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); 413 413 414 414 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 415 415 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
+11 -12
drivers/gpu/drm/xe/xe_gsc.c
··· 261 261 { 262 262 struct xe_gt *gt = gsc_to_gt(gsc); 263 263 struct xe_tile *tile = gt_to_tile(gt); 264 + unsigned int fw_ref; 264 265 int ret; 265 266 266 267 if (XE_WA(tile->primary_gt, 14018094691)) { 267 - ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 268 + fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 268 269 269 270 /* 270 271 * If the forcewake fails we want to keep going, because the worst 271 272 * case outcome in failing to apply the WA is that PXP won't work, 272 - * which is not fatal. We still throw a warning so the issue is 273 - * seen if it happens. 273 + * which is not fatal. Forcewake get warns implicitly in case of failure 274 274 */ 275 - xe_gt_WARN_ON(tile->primary_gt, ret); 276 - 277 275 xe_gt_mcr_multicast_write(tile->primary_gt, 278 276 EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, 279 277 EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); ··· 280 282 ret = gsc_upload(gsc); 281 283 282 284 if (XE_WA(tile->primary_gt, 14018094691)) 283 - xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 285 + xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); 284 286 285 287 if (ret) 286 288 return ret; ··· 350 352 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 351 353 struct xe_gt *gt = gsc_to_gt(gsc); 352 354 struct xe_device *xe = gt_to_xe(gt); 355 + unsigned int fw_ref; 353 356 u32 actions; 354 357 int ret; 355 358 ··· 360 361 spin_unlock_irq(&gsc->lock); 361 362 362 363 xe_pm_runtime_get(xe); 363 - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); 364 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 364 365 365 366 if (actions & GSC_ACTION_ER_COMPLETE) { 366 367 ret = gsc_er_complete(gt); ··· 380 381 xe_gsc_proxy_request_handler(gsc); 381 382 382 383 out: 383 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 384 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 384 385 xe_pm_runtime_put(xe); 385 386 } 386 387 ··· 600 601 { 601 602 struct xe_gt *gt = gsc_to_gt(gsc); 602 603 struct xe_mmio *mmio = &gt->mmio; 603 - int err; 604 + unsigned int fw_ref; 604 605 605 606 xe_uc_fw_print(&gsc->fw, p); 606 607 ··· 609 610 if (!xe_uc_fw_is_enabled(&gsc->fw)) 610 611 return; 611 612 612 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 613 - if (err) 613 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 614 + if (!fw_ref) 614 615 return; 615 616 616 617 drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", ··· 621 622 xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), 622 623 xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); 623 624 624 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 625 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 625 626 }
+4 -5
drivers/gpu/drm/xe/xe_gsc_proxy.c
··· 450 450 { 451 451 struct xe_gt *gt = gsc_to_gt(gsc); 452 452 struct xe_device *xe = gt_to_xe(gt); 453 - int err = 0; 453 + unsigned int fw_ref = 0; 454 454 455 455 if (!gsc->proxy.component_added) 456 456 return; 457 457 458 458 /* disable HECI2 IRQs */ 459 459 xe_pm_runtime_get(xe); 460 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 461 - if (err) 460 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 461 + if (!fw_ref) 462 462 xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); 463 463 464 464 /* try do disable irq even if forcewake failed */ 465 465 gsc_proxy_irq_toggle(gsc, false); 466 466 467 - if (!err) 468 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 467 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 469 468 xe_pm_runtime_put(xe); 470 469 471 470 xe_gsc_wait_for_worker_completion(gsc);
+61 -49
drivers/gpu/drm/xe/xe_gt.c
··· 77 77 return ERR_PTR(-ENOMEM); 78 78 79 79 gt->tile = tile; 80 - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); 80 + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 81 + WQ_MEM_RECLAIM); 81 82 82 83 err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt); 83 84 if (err) ··· 98 97 99 98 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) 100 99 { 100 + unsigned int fw_ref; 101 101 u32 reg; 102 - int err; 103 102 104 103 if (!XE_WA(gt, 16023588340)) 105 104 return; 106 105 107 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 108 - if (WARN_ON(err)) 106 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 107 + if (!fw_ref) 109 108 return; 110 109 111 110 if (!xe_gt_is_media_type(gt)) { ··· 115 114 } 116 115 117 116 xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); 118 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 117 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 119 118 } 120 119 121 120 static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) 122 121 { 122 + unsigned int fw_ref; 123 123 u32 reg; 124 - int err; 125 124 126 125 if (!XE_WA(gt, 16023588340)) 127 126 return; ··· 129 128 if (xe_gt_is_media_type(gt)) 130 129 return; 131 130 132 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 133 - if (WARN_ON(err)) 131 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 132 + if (!fw_ref) 134 133 return; 135 134 136 135 reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); 137 136 reg &= ~CG_DIS_CNTLBUS; 138 137 xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); 139 138 140 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 139 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 141 140 } 142 141 143 142 /** ··· 403 402 404 403 static int gt_fw_domain_init(struct xe_gt *gt) 405 404 { 405 + unsigned int fw_ref; 406 406 int err, i; 407 407 408 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 409 - if (err) 408 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 409 + if (!fw_ref) { 410 + err = -ETIMEDOUT; 410 411 goto err_hw_fence_irq; 412 + } 411 413 412 414 if (!xe_gt_is_media_type(gt)) { 413 415 err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); ··· 445 441 */ 446 442 gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID); 447 443 448 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 449 - XE_WARN_ON(err); 450 - 444 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 451 445 return 0; 452 446 453 447 err_force_wake: 454 448 dump_pat_on_error(gt); 455 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 449 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 456 450 err_hw_fence_irq: 457 451 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 458 452 xe_hw_fence_irq_finish(&gt->fence_irq[i]); ··· 460 458 461 459 static int all_fw_domain_init(struct xe_gt *gt) 462 460 { 461 + unsigned int fw_ref; 463 462 int err, i; 464 463 465 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 466 - if (err) 467 - goto err_hw_fence_irq; 464 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 465 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 466 + err = -ETIMEDOUT; 467 + goto err_force_wake; 468 + } 468 469 469 470 xe_gt_mcr_set_implicit_defaults(gt); 470 471 xe_reg_sr_apply_mmio(&gt->reg_sr, gt); ··· 531 526 if (IS_SRIOV_PF(gt_to_xe(gt))) 532 527 xe_gt_sriov_pf_init_hw(gt); 533 528 534 - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 535 - XE_WARN_ON(err); 529 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 536 530 537 531 return 0; 538 532 539 533 err_force_wake: 540 - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 541 - err_hw_fence_irq: 534 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 542 535 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 543 536 xe_hw_fence_irq_finish(&gt->fence_irq[i]); 544 537 ··· 549 546 */ 550 547 int xe_gt_init_hwconfig(struct xe_gt *gt) 551 548 { 549 + unsigned int fw_ref; 552 550 int err; 553 551 554 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 555 - if (err) 556 - goto out; 552 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 553 + if (!fw_ref) 554 + return -ETIMEDOUT; 557 555 558 556 xe_gt_mcr_init_early(gt); 559 557 xe_pat_init(gt); ··· 572 568 xe_gt_enable_host_l2_vram(gt); 573 569 574 570 out_fw: 575 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 576 - out: 571 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 577 572 return err; 578 573 } 579 574 ··· 767 764 768 765 static int gt_reset(struct xe_gt *gt) 769 766 { 767 + unsigned int fw_ref; 770 768 int err; 771 769 772 770 if (xe_device_wedged(gt_to_xe(gt))) ··· 788 784 789 785 xe_gt_sanitize(gt); 790 786 791 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 792 - if (err) 793 - goto err_msg; 787 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 788 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 789 + err = -ETIMEDOUT; 790 + goto err_out; 791 + } 794 792 795 793 xe_uc_gucrc_disable(&gt->uc); 796 794 xe_uc_stop_prepare(&gt->uc); ··· 810 804 if (err) 811 805 goto err_out; 812 806 813 - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 814 - XE_WARN_ON(err); 807 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 815 808 xe_pm_runtime_put(gt_to_xe(gt)); 816 809 817 810 xe_gt_info(gt, "reset done\n"); ··· 818 813 return 0; 819 814 820 815 err_out: 821 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 822 - err_msg: 816 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 823 817 XE_WARN_ON(xe_uc_start(&gt->uc)); 824 818 err_fail: 825 819 xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); ··· 838 834 839 835 void xe_gt_reset_async(struct xe_gt *gt) 840 836 { 841 - xe_gt_info(gt, "trying reset\n"); 837 + xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0)); 842 838 843 839 /* Don't do a reset while one is already in flight */ 844 840 if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc)) ··· 850 846 851 847 void xe_gt_suspend_prepare(struct xe_gt *gt) 852 848 { 853 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 849 + unsigned int fw_ref; 850 + 851 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 854 852 855 853 xe_uc_stop_prepare(&gt->uc); 856 854 857 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 855 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 858 856 } 859 857 860 858 int xe_gt_suspend(struct xe_gt *gt) 861 859 { 860 + unsigned int fw_ref; 862 861 int err; 863 862 864 863 xe_gt_dbg(gt, "suspending\n"); 865 864 xe_gt_sanitize(gt); 866 865 867 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 868 - if (err) 866 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 867 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 869 868 goto err_msg; 870 869 871 870 err = xe_uc_suspend(&gt->uc); ··· 879 872 880 873 xe_gt_disable_host_l2_vram(gt); 881 874 882 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 875 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 883 876 xe_gt_dbg(gt, "suspended\n"); 884 877 885 878 return 0; 886 879 887 - err_force_wake: 888 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 889 880 err_msg: 881 + err = -ETIMEDOUT; 882 + err_force_wake: 883 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 890 884 xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); 891 885 892 886 return err; ··· 895 887 896 888 void xe_gt_shutdown(struct xe_gt *gt) 897 889 { 898 - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 890 + unsigned int fw_ref; 891 + 892 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 899 893 do_gt_reset(gt); 900 - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 894 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 901 895 } 902 896 903 897 /** ··· 924 914 925 915 int xe_gt_resume(struct xe_gt *gt) 926 916 { 917 + unsigned int fw_ref; 927 918 int err; 928 919 929 920 xe_gt_dbg(gt, "resuming\n"); 930 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 931 - if (err) 921 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 922 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 932 923 goto err_msg; 933 924 934 925 err = do_gt_restart(gt); ··· 938 927 939 928 xe_gt_idle_enable_pg(gt); 940 929 941 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 930 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 942 931 xe_gt_dbg(gt, "resumed\n"); 943 932 944 933 return 0; 945 934 946 - err_force_wake: 947 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 948 935 err_msg: 936 + err = -ETIMEDOUT; 937 + err_force_wake: 938 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 949 939 xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); 950 940 951 941 return err;
+6 -7
drivers/gpu/drm/xe/xe_gt_debugfs.c
··· 90 90 struct xe_device *xe = gt_to_xe(gt); 91 91 struct xe_hw_engine *hwe; 92 92 enum xe_hw_engine_id id; 93 - int err; 93 + unsigned int fw_ref; 94 94 95 95 xe_pm_runtime_get(xe); 96 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 97 - if (err) { 96 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 97 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 98 98 xe_pm_runtime_put(xe); 99 - return err; 99 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 100 + return -ETIMEDOUT; 100 101 } 101 102 102 103 for_each_hw_engine(hwe, gt, id) 103 104 xe_hw_engine_print(hwe, p); 104 105 105 - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 106 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 106 107 xe_pm_runtime_put(xe); 107 - if (err) 108 - return err; 109 108 110 109 return 0; 111 110 }
+15 -11
drivers/gpu/drm/xe/xe_gt_idle.c
··· 101 101 struct xe_gt_idle *gtidle = &gt->gtidle; 102 102 struct xe_mmio *mmio = &gt->mmio; 103 103 u32 vcs_mask, vecs_mask; 104 + unsigned int fw_ref; 104 105 int i, j; 105 106 106 107 if (IS_SRIOV_VF(xe)) ··· 128 127 VDN_MFXVDENC_POWERGATE_ENABLE(j)); 129 128 } 130 129 131 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 130 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 132 131 if (xe->info.skip_guc_pc) { 133 132 /* 134 133 * GuC sets the hysteresis value when GuC PC is enabled ··· 139 138 } 140 139 141 140 xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable); 142 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); 141 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 143 142 } 144 143 145 144 void xe_gt_idle_disable_pg(struct xe_gt *gt) 146 145 { 147 146 struct xe_gt_idle *gtidle = &gt->gtidle; 147 + unsigned int fw_ref; 148 148 149 149 if (IS_SRIOV_VF(gt_to_xe(gt))) 150 150 return; ··· 153 151 xe_device_assert_mem_access(gt_to_xe(gt)); 154 152 gtidle->powergate_enable = 0; 155 153 156 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 154 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 157 155 xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); 158 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); 156 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 159 157 } 160 158 161 159 /** ··· 174 172 enum xe_gt_idle_state state; 175 173 u32 pg_enabled, pg_status = 0; 176 174 u32 vcs_mask, vecs_mask; 177 - int err, n; 175 + unsigned int fw_ref; 176 + int n; 178 177 /* 179 178 * Media Slices 180 179 * ··· 211 208 212 209 /* Do not wake the GT to read powergating status */ 213 210 if (state != GT_IDLE_C6) { 214 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 215 - if (err) 216 - return err; 211 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 212 + if (!fw_ref) 213 + return -ETIMEDOUT; 217 214 218 215 pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE); 219 216 pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS); 220 217 221 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); 218 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 222 219 } 223 220 224 221 if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) { ··· 301 298 { 302 299 struct kobject *kobj = arg; 303 300 struct xe_gt *gt = kobj_to_gt(kobj->parent); 301 + unsigned int fw_ref; 304 302 305 303 xe_gt_idle_disable_pg(gt); 306 304 307 305 if (gt_to_xe(gt)->info.skip_guc_pc) { 308 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 306 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 309 307 xe_gt_idle_disable_c6(gt); 310 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 308 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 311 309 } 312 310 313 311 sysfs_remove_files(kobj, gt_idle_attrs);
+35
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
··· 2377 2377 } 2378 2378 2379 2379 /** 2380 + * xe_gt_sriov_pf_config_print_lmem - Print LMEM configurations. 2381 + * @gt: the &xe_gt 2382 + * @p: the &drm_printer 2383 + * 2384 + * Print LMEM allocations across all VFs. 2385 + * VFs without LMEM allocation are skipped. 2386 + * 2387 + * This function can only be called on PF. 2388 + * Return: 0 on success or a negative error code on failure. 2389 + */ 2390 + int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) 2391 + { 2392 + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); 2393 + const struct xe_gt_sriov_config *config; 2394 + char buf[10]; 2395 + 2396 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 2397 + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); 2398 + 2399 + for (n = 1; n <= total_vfs; n++) { 2400 + config = &gt->sriov.pf.vfs[n].config; 2401 + if (!config->lmem_obj) 2402 + continue; 2403 + 2404 + string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, 2405 + buf, sizeof(buf)); 2406 + drm_printf(p, "VF%u:\t%zu\t(%s)\n", 2407 + n, config->lmem_obj->size, buf); 2408 + } 2409 + 2410 + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); 2411 + return 0; 2412 + } 2413 + 2414 + /** 2380 2415 * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges. 2381 2416 * @gt: the &xe_gt 2382 2417 * @p: the &drm_printer
+1
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
··· 65 65 int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); 66 66 int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p); 67 67 int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p); 68 + int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p); 68 69 69 70 int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p); 70 71
+5
drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
··· 82 82 .data = xe_gt_sriov_pf_config_print_dbs, 83 83 }, 84 84 { 85 + "lmem_provisioned", 86 + .show = xe_gt_debugfs_simple_show, 87 + .data = xe_gt_sriov_pf_config_print_lmem, 88 + }, 89 + { 85 90 "runtime_registers", 86 91 .show = xe_gt_debugfs_simple_show, 87 92 .data = xe_gt_sriov_pf_service_print_runtime,
+3 -2
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
··· 268 268 int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) 269 269 { 270 270 struct xe_device *xe = gt_to_xe(gt); 271 + unsigned int fw_ref; 271 272 272 273 if (xe_guc_ct_enabled(&gt->uc.guc.ct) && 273 274 gt->uc.guc.submission_state.enabled) { ··· 287 286 if (IS_SRIOV_VF(xe)) 288 287 return 0; 289 288 290 - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 289 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 291 290 if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 292 291 xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, 293 292 PVC_GUC_TLB_INV_DESC1_INVALIDATE); ··· 297 296 xe_mmio_write32(mmio, GUC_TLB_INV_CR, 298 297 GUC_TLB_INV_CR_INVALIDATE); 299 298 } 300 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 299 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 301 300 } 302 301 303 302 return 0;
+12 -8
drivers/gpu/drm/xe/xe_guc.c
··· 70 70 71 71 static u32 guc_ctl_feature_flags(struct xe_guc *guc) 72 72 { 73 - u32 flags = 0; 73 + u32 flags = GUC_CTL_ENABLE_LITE_RESTORE; 74 74 75 75 if (!guc_to_xe(guc)->info.skip_guc_pc) 76 76 flags |= GUC_CTL_ENABLE_SLPC; ··· 248 248 { 249 249 struct xe_guc *guc = arg; 250 250 struct xe_gt *gt = guc_to_gt(guc); 251 + unsigned int fw_ref; 251 252 252 - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 253 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 253 254 xe_uc_fini_hw(&guc_to_gt(guc)->uc); 254 - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 255 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 255 256 } 256 257 257 258 /** ··· 1156 1155 void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) 1157 1156 { 1158 1157 struct xe_gt *gt = guc_to_gt(guc); 1158 + unsigned int fw_ref; 1159 1159 u32 status; 1160 - int err; 1161 1160 int i; 1162 1161 1163 1162 xe_uc_fw_print(&guc->fw, p); 1164 1163 1165 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 1166 - if (err) 1164 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 1165 + if (!fw_ref) 1167 1166 return; 1168 1167 1169 1168 status = xe_mmio_read32(&gt->mmio, GUC_STATUS); ··· 1184 1183 i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i))); 1185 1184 } 1186 1185 1187 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 1186 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 1188 1187 1189 - xe_guc_ct_print(&guc->ct, p); 1188 + drm_puts(p, "\n"); 1189 + xe_guc_ct_print(&guc->ct, p, false); 1190 + 1191 + drm_puts(p, "\n"); 1190 1192 xe_guc_submit_print(guc, p); 1191 1193 } 1192 1194
+5
drivers/gpu/drm/xe/xe_guc_ads.c
··· 359 359 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, 360 360 &offset, &remain); 361 361 362 + if (XE_WA(gt, 14022866841)) 363 + guc_waklv_enable_simple(ads, 364 + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, 365 + &offset, &remain); 366 + 362 367 /* 363 368 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, 364 369 * the default value for this register is determined to be 0xC40. This could change in the
+7 -1
drivers/gpu/drm/xe/xe_guc_capture.c
··· 1590 1590 u16 guc_id = 0; 1591 1591 u32 lrca = 0; 1592 1592 1593 + if (IS_SRIOV_VF(xe)) 1594 + return; 1595 + 1593 1596 new = guc_capture_get_prealloc_node(guc); 1594 1597 if (!new) 1595 1598 return; ··· 1823 1820 return NULL; 1824 1821 1825 1822 xe = gt_to_xe(q->gt); 1826 - if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe)) 1823 + if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe)) 1827 1824 return NULL; 1828 1825 1829 1826 ss = &xe->devcoredump.snapshot; ··· 1878 1875 struct xe_hw_engine *hwe; 1879 1876 enum xe_hw_engine_id id; 1880 1877 u32 adj_logical_mask = q->logical_mask; 1878 + 1879 + if (IS_SRIOV_VF(xe)) 1880 + return; 1881 1881 1882 1882 for_each_hw_engine(hwe, q->gt, id) { 1883 1883 if (hwe->class != q->hwe->class ||
+29 -27
drivers/gpu/drm/xe/xe_guc_ct.c
··· 213 213 214 214 xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); 215 215 216 - ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0); 216 + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM); 217 217 if (!ct->g2h_wq) 218 218 return -ENOMEM; 219 219 ··· 1607 1607 receive_g2h(ct); 1608 1608 } 1609 1609 1610 - struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic) 1610 + static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, 1611 + bool want_ctb) 1611 1612 { 1612 1613 struct xe_guc_ct_snapshot *snapshot; 1613 1614 ··· 1616 1615 if (!snapshot) 1617 1616 return NULL; 1618 1617 1619 - if (ct->bo) { 1618 + if (ct->bo && want_ctb) { 1620 1619 snapshot->ctb_size = ct->bo->size; 1621 1620 snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); 1622 1621 } ··· 1646 1645 drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); 1647 1646 } 1648 1647 1649 - /** 1650 - * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. 1651 - * @ct: GuC CT object. 1652 - * @atomic: Boolean to indicate if this is called from atomic context like 1653 - * reset or CTB handler or from some regular path like debugfs. 1654 - * 1655 - * This can be printed out in a later stage like during dev_coredump 1656 - * analysis. 1657 - * 1658 - * Returns: a GuC CT snapshot object that must be freed by the caller 1659 - * by using `xe_guc_ct_snapshot_free`. 1660 - */ 1661 - struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, 1662 - bool atomic) 1648 + static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic, 1649 + bool want_ctb) 1663 1650 { 1664 1651 struct xe_device *xe = ct_to_xe(ct); 1665 1652 struct xe_guc_ct_snapshot *snapshot; 1666 1653 1667 - snapshot = xe_guc_ct_snapshot_alloc(ct, atomic); 1654 + snapshot = guc_ct_snapshot_alloc(ct, atomic, want_ctb); 1668 1655 if (!snapshot) { 1669 1656 xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n"); 1670 1657 return NULL; ··· 1669 1680 xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size); 1670 1681 1671 1682 return snapshot; 1683 + } 1684 + 1685 + /** 1686 + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. 1687 + * @ct: GuC CT object. 1688 + * 1689 + * This can be printed out in a later stage like during dev_coredump 1690 + * analysis. This is safe to be called during atomic context. 1691 + * 1692 + * Returns: a GuC CT snapshot object that must be freed by the caller 1693 + * by using `xe_guc_ct_snapshot_free`. 1694 + */ 1695 + struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct) 1696 + { 1697 + return guc_ct_snapshot_capture(ct, true, true); 1672 1698 } 1673 1699 1674 1700 /** ··· 1708 1704 drm_printf(p, "\tg2h outstanding: %d\n", 1709 1705 snapshot->g2h_outstanding); 1710 1706 1711 - if (snapshot->ctb) { 1707 + if (snapshot->ctb) 1712 1708 xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); 1713 - } else { 1714 - drm_printf(p, "CTB snapshot missing!\n"); 1715 - return; 1716 - } 1717 1709 } else { 1718 1710 drm_puts(p, "CT disabled\n"); 1719 1711 } ··· 1735 1735 * xe_guc_ct_print - GuC CT Print. 1736 1736 * @ct: GuC CT. 1737 1737 * @p: drm_printer where it will be printed out. 1738 + * @want_ctb: Should the full CTB content be dumped (vs just the headers) 1738 1739 * 1739 - * This function quickly capture a snapshot and immediately print it out. 1740 + * This function will quickly capture a snapshot of the CT state 1741 + * and immediately print it out. 1740 1742 */ 1741 - void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) 1743 + void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) 1742 1744 { 1743 1745 struct xe_guc_ct_snapshot *snapshot; 1744 1746 1745 - snapshot = xe_guc_ct_snapshot_capture(ct, false); 1747 + snapshot = guc_ct_snapshot_capture(ct, false, want_ctb); 1746 1748 xe_guc_ct_snapshot_print(snapshot, p); 1747 1749 xe_guc_ct_snapshot_free(snapshot); 1748 1750 } ··· 1778 1776 return; 1779 1777 1780 1778 snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true); 1781 - snapshot_ct = xe_guc_ct_snapshot_capture((ct), true); 1779 + snapshot_ct = xe_guc_ct_snapshot_capture((ct)); 1782 1780 1783 1781 spin_lock_irqsave(&ct->dead.lock, flags); 1784 1782
+2 -3
drivers/gpu/drm/xe/xe_guc_ct.h
··· 17 17 void xe_guc_ct_stop(struct xe_guc_ct *ct); 18 18 void xe_guc_ct_fast_path(struct xe_guc_ct *ct); 19 19 20 - struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic); 21 - struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic); 20 + struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); 22 21 void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p); 23 22 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); 24 - void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); 23 + void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); 25 24 26 25 static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) 27 26 {
+14
drivers/gpu/drm/xe/xe_guc_debugfs.c
··· 47 47 return 0; 48 48 } 49 49 50 + static int guc_ctb(struct seq_file *m, void *data) 51 + { 52 + struct xe_guc *guc = node_to_guc(m->private); 53 + struct xe_device *xe = guc_to_xe(guc); 54 + struct drm_printer p = drm_seq_file_printer(m); 55 + 56 + xe_pm_runtime_get(xe); 57 + xe_guc_ct_print(&guc->ct, &p, true); 58 + xe_pm_runtime_put(xe); 59 + 60 + return 0; 61 + } 62 + 50 63 static const struct drm_info_list debugfs_list[] = { 51 64 {"guc_info", guc_info, 0}, 52 65 {"guc_log", guc_log, 0}, 66 + {"guc_ctb", guc_ctb, 0}, 53 67 }; 54 68 55 69 void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+1
drivers/gpu/drm/xe/xe_guc_fwif.h
··· 105 105 106 106 #define GUC_CTL_FEATURE 2 107 107 #define GUC_CTL_ENABLE_SLPC BIT(2) 108 + #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) 108 109 #define GUC_CTL_DISABLE_SCHEDULER BIT(14) 109 110 110 111 #define GUC_CTL_DEBUG 3
+8 -7
drivers/gpu/drm/xe/xe_guc_log.c
··· 145 145 struct xe_device *xe = log_to_xe(log); 146 146 struct xe_guc *guc = log_to_guc(log); 147 147 struct xe_gt *gt = log_to_gt(log); 148 + unsigned int fw_ref; 148 149 size_t remain; 149 - int i, err; 150 + int i; 150 151 151 152 if (!log->bo) { 152 153 xe_gt_err(gt, "GuC log buffer not allocated\n"); ··· 169 168 remain -= size; 170 169 } 171 170 172 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 173 - if (err) { 174 - snapshot->stamp = ~0; 171 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 172 + if (!fw_ref) { 173 + snapshot->stamp = ~0ULL; 175 174 } else { 176 - snapshot->stamp = xe_mmio_read32(&gt->mmio, GUC_PMTIMESTAMP); 177 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 175 + snapshot->stamp = xe_mmio_read64_2x32(&gt->mmio, GUC_PMTIMESTAMP_LO); 176 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 178 177 } 179 178 snapshot->ktime = ktime_get_boottime_ns(); 180 179 snapshot->level = log->level; ··· 205 204 snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch, 206 205 snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch); 207 206 drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime); 208 - drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", snapshot->stamp, snapshot->stamp); 207 + drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); 209 208 drm_printf(p, "Log level: %u\n", snapshot->level); 210 209 211 210 remain = snapshot->size;
+1 -1
drivers/gpu/drm/xe/xe_guc_log_types.h
··· 27 27 /** @ktime: Kernel time the snapshot was taken */ 28 28 u64 ktime; 29 29 /** @stamp: GuC timestamp at which the snapshot was taken */ 30 - u32 stamp; 30 + u64 stamp; 31 31 /** @level: GuC log verbosity level */ 32 32 u32 level; 33 33 /** @ver_found: GuC firmware version */
+31 -19
drivers/gpu/drm/xe/xe_guc_pc.c
··· 415 415 int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) 416 416 { 417 417 struct xe_gt *gt = pc_to_gt(pc); 418 - int ret; 418 + unsigned int fw_ref; 419 419 420 420 /* 421 421 * GuC SLPC plays with cur freq request when GuCRC is enabled 422 422 * Block RC6 for a more reliable read. 423 423 */ 424 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 425 - if (ret) 426 - return ret; 424 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 425 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 426 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 427 + return -ETIMEDOUT; 428 + } 427 429 428 430 *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ); 429 431 430 432 *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); 431 433 *freq = decode_freq(*freq); 432 434 433 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 435 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 434 436 return 0; 435 437 } 436 438 ··· 482 480 int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) 483 481 { 484 482 struct xe_gt *gt = pc_to_gt(pc); 483 + unsigned int fw_ref; 485 484 int ret; 486 485 487 486 mutex_lock(&pc->freq_lock); ··· 496 493 * GuC SLPC plays with min freq request when GuCRC is enabled 497 494 * Block RC6 for a more reliable read. 498 495 */ 499 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 500 - if (ret) 501 - goto out; 496 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 497 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 498 + ret = -ETIMEDOUT; 499 + goto fw; 500 + } 502 501 503 502 ret = pc_action_query_task_state(pc); 504 503 if (ret) ··· 509 504 *freq = pc_get_min_freq(pc); 510 505 511 506 fw: 512 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 507 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 513 508 out: 514 509 mutex_unlock(&pc->freq_lock); 515 510 return ret; ··· 860 855 { 861 856 struct xe_device *xe = pc_to_xe(pc); 862 857 struct xe_gt *gt = pc_to_gt(pc); 858 + unsigned int fw_ref; 863 859 int ret = 0; 864 860 865 861 if (xe->info.skip_guc_pc) ··· 870 864 if (ret) 871 865 return ret; 872 866 873 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 874 - if (ret) 875 - return ret; 867 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 868 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 869 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 870 + return -ETIMEDOUT; 871 + } 876 872 877 873 xe_gt_idle_disable_c6(gt); 878 874 879 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 875 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 880 876 881 877 return 0; 882 878 } ··· 964 956 struct xe_device *xe = pc_to_xe(pc); 965 957 struct xe_gt *gt = pc_to_gt(pc); 966 958 u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); 959 + unsigned int fw_ref; 967 960 int ret; 968 961 969 962 xe_gt_assert(gt, xe_device_uc_enabled(xe)); 970 963 971 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 972 - if (ret) 973 - return ret; 964 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 965 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 966 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 967 + return -ETIMEDOUT; 968 + } 974 969 975 970 if (xe->info.skip_guc_pc) { 976 971 if (xe->info.platform != XE_PVC) ··· 1016 1005 ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); 1017 1006 1018 1007 out: 1019 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 1008 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 1020 1009 return ret; 1021 1010 } 1022 1011 ··· 1048 1037 { 1049 1038 struct xe_guc_pc *pc = arg; 1050 1039 struct xe_device *xe = pc_to_xe(pc); 1040 + unsigned int fw_ref; 1051 1041 1052 1042 if (xe_device_wedged(xe)) 1053 1043 return; 1054 1044 1055 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); 1045 + fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); 1056 1046 xe_guc_pc_gucrc_disable(pc); 1057 1047 XE_WARN_ON(xe_guc_pc_stop(pc)); 1058 1048 1059 1049 /* Bind requested freq to mert_freq_cap before unload */ 1060 1050 pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); 1061 1051 1062 - xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); 1052 + xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); 1063 1053 } 1064 1054 1065 1055 /**
+9 -6
drivers/gpu/drm/xe/xe_guc_submit.c
··· 717 717 struct xe_exec_queue *q = job->q; 718 718 struct xe_guc *guc = exec_queue_to_guc(q); 719 719 struct xe_device *xe = guc_to_xe(guc); 720 + struct dma_fence *fence = NULL; 720 721 bool lr = xe_exec_queue_is_lr(q); 721 722 722 723 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || ··· 735 734 736 735 if (lr) { 737 736 xe_sched_job_set_error(job, -EOPNOTSUPP); 738 - return NULL; 739 - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { 740 - return job->fence; 737 + dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */ 741 738 } else { 742 - return dma_fence_get(job->fence); 739 + fence = job->fence; 743 740 } 741 + 742 + return fence; 744 743 } 745 744 746 745 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) ··· 1036 1035 struct xe_guc *guc = exec_queue_to_guc(q); 1037 1036 const char *process_name = "no process"; 1038 1037 struct xe_device *xe = guc_to_xe(guc); 1038 + unsigned int fw_ref; 1039 1039 int err = -ETIME; 1040 1040 pid_t pid = -1; 1041 1041 int i = 0; ··· 1070 1068 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1071 1069 !xe_guc_capture_get_matching_and_lock(job)) { 1072 1070 /* take force wake before engine register manual capture */ 1073 - if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) 1071 + fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1072 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 1074 1073 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1075 1074 1076 1075 xe_engine_snapshot_capture_for_job(job); 1077 1076 1078 - xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1077 + xe_force_wake_put(gt_to_fw(q->gt), fw_ref); 1079 1078 } 1080 1079 1081 1080 /*
+4 -4
drivers/gpu/drm/xe/xe_huc.c
··· 296 296 void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) 297 297 { 298 298 struct xe_gt *gt = huc_to_gt(huc); 299 - int err; 299 + unsigned int fw_ref; 300 300 301 301 xe_uc_fw_print(&huc->fw, p); 302 302 303 303 if (!xe_uc_fw_is_enabled(&huc->fw)) 304 304 return; 305 305 306 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 307 - if (err) 306 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 307 + if (!fw_ref) 308 308 return; 309 309 310 310 drm_printf(p, "\nHuC status: 0x%08x\n", 311 311 xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO)); 312 312 313 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 313 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 314 314 }
+5 -9
drivers/gpu/drm/xe/xe_mocs.c
··· 774 774 775 775 void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) 776 776 { 777 - struct xe_mocs_info table; 778 - unsigned int flags; 779 - u32 ret; 780 777 struct xe_device *xe = gt_to_xe(gt); 778 + struct xe_mocs_info table; 779 + unsigned int fw_ref, flags; 781 780 782 781 flags = get_mocs_settings(xe, &table); 783 782 784 783 xe_pm_runtime_get_noresume(xe); 785 - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 786 - 787 - if (ret) 784 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 785 + if (!fw_ref) 788 786 goto err_fw; 789 787 790 788 table.ops->dump(&table, flags, gt, p); 791 789 792 - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 793 - 790 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 794 791 err_fw: 795 - xe_assert(xe, !ret); 796 792 xe_pm_runtime_put(xe); 797 793 } 798 794
+467 -211
drivers/gpu/drm/xe/xe_oa.c
··· 36 36 #include "xe_pm.h" 37 37 #include "xe_sched_job.h" 38 38 #include "xe_sriov.h" 39 + #include "xe_sync.h" 39 40 40 41 #define DEFAULT_POLL_FREQUENCY_HZ 200 41 42 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) 42 43 #define XE_OA_UNIT_INVALID U32_MAX 44 + 45 + enum xe_oa_submit_deps { 46 + XE_OA_SUBMIT_NO_DEPS, 47 + XE_OA_SUBMIT_ADD_DEPS, 48 + }; 49 + 50 + enum xe_oa_user_extn_from { 51 + XE_OA_USER_EXTN_FROM_OPEN, 52 + XE_OA_USER_EXTN_FROM_CONFIG, 53 + }; 43 54 44 55 struct xe_oa_reg { 45 56 struct xe_reg addr; ··· 81 70 }; 82 71 83 72 struct xe_oa_open_param { 73 + struct xe_file *xef; 84 74 u32 oa_unit_id; 85 75 bool sample; 86 76 u32 metric_set; ··· 93 81 struct xe_exec_queue *exec_q; 94 82 struct xe_hw_engine *hwe; 95 83 bool no_preempt; 84 + struct drm_xe_sync __user *syncs_user; 85 + int num_syncs; 86 + struct xe_sync_entry *syncs; 96 87 }; 97 88 98 89 struct xe_oa_config_bo { ··· 103 88 104 89 struct xe_oa_config *oa_config; 105 90 struct xe_bb *bb; 91 + }; 92 + 93 + struct xe_oa_fence { 94 + /* @base: dma fence base */ 95 + struct dma_fence base; 96 + /* @lock: lock for the fence */ 97 + spinlock_t lock; 98 + /* @work: work to signal @base */ 99 + struct delayed_work work; 100 + /* @cb: callback to schedule @work */ 101 + struct dma_fence_cb cb; 106 102 }; 107 103 108 104 #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x ··· 188 162 return oa_config; 189 163 } 190 164 191 - static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) 165 + static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) 192 166 { 193 167 xe_oa_config_put(oa_bo->oa_config); 194 - xe_bb_free(oa_bo->bb, NULL); 168 + xe_bb_free(oa_bo->bb, last_fence); 195 169 kfree(oa_bo); 196 170 } 197 171 ··· 596 570 return ret; 597 571 } 598 572 599 - static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) 573 + static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, 574 + struct xe_bb *bb) 600 575 { 601 576 struct xe_sched_job *job; 602 577 struct dma_fence *fence; 603 - long timeout; 604 578 int err = 0; 605 579 606 580 /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ ··· 610 584 goto exit; 611 585 } 612 586 587 + if (deps == XE_OA_SUBMIT_ADD_DEPS) { 588 + for (int i = 0; i < stream->num_syncs && !err; i++) 589 + err = xe_sync_entry_add_deps(&stream->syncs[i], job); 590 + if (err) { 591 + drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); 592 + goto err_put_job; 593 + } 594 + } 595 + 613 596 xe_sched_job_arm(job); 614 597 fence = dma_fence_get(&job->drm.s_fence->finished); 615 598 xe_sched_job_push(job); 616 599 617 - timeout = dma_fence_wait_timeout(fence, false, HZ); 618 - dma_fence_put(fence); 619 - if (timeout < 0) 620 - err = timeout; 621 - else if (!timeout) 622 - err = -ETIME; 600 + return fence; 601 + err_put_job: 602 + xe_sched_job_put(job); 623 603 exit: 624 - return err; 604 + return ERR_PTR(err); 625 605 } 626 606 627 607 static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) ··· 671 639 672 640 xe_oa_config_put(stream->oa_config); 673 641 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) 674 - free_oa_config_bo(oa_bo); 642 + free_oa_config_bo(oa_bo, stream->last_fence); 643 + dma_fence_put(stream->last_fence); 675 644 } 676 645 677 646 static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, ··· 692 659 static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, 693 660 const struct flex *flex, u32 count) 694 661 { 662 + struct dma_fence *fence; 695 663 struct xe_bb *bb; 696 664 int err; 697 665 ··· 704 670 705 671 xe_oa_store_flex(stream, lrc, bb, flex, count); 706 672 707 - err = xe_oa_submit_bb(stream, bb); 673 + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); 674 + if (IS_ERR(fence)) { 675 + err = PTR_ERR(fence); 676 + goto free_bb; 677 + } 678 + xe_bb_free(bb, fence); 679 + dma_fence_put(fence); 680 + 681 + return 0; 682 + free_bb: 708 683 xe_bb_free(bb, NULL); 709 684 exit: 710 685 return err; ··· 721 678 722 679 static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) 723 680 { 681 + struct dma_fence *fence; 724 682 struct xe_bb *bb; 725 683 int err; 726 684 ··· 733 689 734 690 write_cs_mi_lri(bb, reg_lri, 1); 735 691 736 - err = xe_oa_submit_bb(stream, bb); 692 + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); 693 + if (IS_ERR(fence)) { 694 + err = PTR_ERR(fence); 695 + goto free_bb; 696 + } 697 + xe_bb_free(bb, fence); 698 + dma_fence_put(fence); 699 + 700 + return 0; 701 + free_bb: 737 702 xe_bb_free(bb, NULL); 738 703 exit: 739 704 return err; ··· 890 837 891 838 xe_oa_free_oa_buffer(stream); 892 839 893 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 840 + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 894 841 xe_pm_runtime_put(stream->oa->xe); 895 842 896 843 /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ ··· 898 845 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc)); 899 846 900 847 xe_oa_free_configs(stream); 848 + xe_file_put(stream->xef); 901 849 } 902 850 903 851 static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) ··· 969 915 return oa_bo; 970 916 } 971 917 918 + static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) 919 + { 920 + dma_fence_put(stream->last_fence); 921 + stream->last_fence = dma_fence_get(fence); 922 + } 923 + 924 + static void xe_oa_fence_work_fn(struct work_struct *w) 925 + { 926 + struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); 927 + 928 + /* Signal fence to indicate new OA configuration is active */ 929 + dma_fence_signal(&ofence->base); 930 + dma_fence_put(&ofence->base); 931 + } 932 + 933 + static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 934 + { 935 + /* Additional empirical delay needed for NOA programming after registers are written */ 936 + #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 937 + 938 + struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); 939 + 940 + INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); 941 + queue_delayed_work(system_unbound_wq, &ofence->work, 942 + usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); 943 + dma_fence_put(fence); 944 + } 945 + 946 + static const char *xe_oa_get_driver_name(struct dma_fence *fence) 947 + { 948 + return "xe_oa"; 949 + } 950 + 951 + static const char *xe_oa_get_timeline_name(struct dma_fence *fence) 952 + { 953 + return "unbound"; 954 + } 955 + 956 + static const struct dma_fence_ops xe_oa_fence_ops = { 957 + .get_driver_name = xe_oa_get_driver_name, 958 + .get_timeline_name = xe_oa_get_timeline_name, 959 + }; 960 + 972 961 static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) 973 962 { 974 963 #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 975 964 struct xe_oa_config_bo *oa_bo; 976 - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; 965 + struct xe_oa_fence *ofence; 966 + int i, err, num_signal = 0; 967 + struct dma_fence *fence; 968 + 969 + ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); 970 + if (!ofence) { 971 + err = -ENOMEM; 972 + goto exit; 973 + } 977 974 978 975 oa_bo = xe_oa_alloc_config_buffer(stream, config); 979 976 if (IS_ERR(oa_bo)) { ··· 1032 927 goto exit; 1033 928 } 1034 929 1035 - err = xe_oa_submit_bb(stream, oa_bo->bb); 930 + /* Emit OA configuration batch */ 931 + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); 932 + if (IS_ERR(fence)) { 933 + err = PTR_ERR(fence); 934 + goto exit; 935 + } 1036 936 1037 - /* Additional empirical delay needed for NOA programming after registers are written */ 1038 - usleep_range(us, 2 * us); 937 + /* Point of no return: initialize and set fence to signal */ 938 + spin_lock_init(&ofence->lock); 939 + dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); 940 + 941 + for (i = 0; i < stream->num_syncs; i++) { 942 + if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) 943 + num_signal++; 944 + xe_sync_entry_signal(&stream->syncs[i], &ofence->base); 945 + } 946 + 947 + /* Additional dma_fence_get in case we dma_fence_wait */ 948 + if (!num_signal) 949 + dma_fence_get(&ofence->base); 950 + 951 + /* Update last fence too before adding callback */ 952 + xe_oa_update_last_fence(stream, fence); 953 + 954 + /* Add job fence callback to schedule work to signal ofence->base */ 955 + err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb); 956 + xe_gt_assert(stream->gt, !err || err == -ENOENT); 957 + if (err == -ENOENT) 958 + xe_oa_config_cb(fence, &ofence->cb); 959 + 960 + /* If nothing needs to be signaled we wait synchronously */ 961 + if (!num_signal) { 962 + dma_fence_wait(&ofence->base, false); 963 + dma_fence_put(&ofence->base); 964 + } 965 + 966 + /* Done with syncs */ 967 + for (i = 0; i < stream->num_syncs; i++) 968 + xe_sync_entry_cleanup(&stream->syncs[i]); 969 + kfree(stream->syncs); 970 + 971 + return 0; 1039 972 exit: 973 + kfree(ofence); 1040 974 return err; 1041 975 } 1042 976 ··· 1145 1001 } 1146 1002 1147 1003 return xe_oa_emit_oa_config(stream, stream->oa_config); 1004 + } 1005 + 1006 + static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) 1007 + { 1008 + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); 1009 + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); 1010 + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); 1011 + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); 1012 + int idx; 1013 + 1014 + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { 1015 + const struct xe_oa_format *f = &oa->oa_formats[idx]; 1016 + 1017 + if (counter_size == f->counter_size && bc_report == f->bc_report && 1018 + type == f->type && counter_sel == f->counter_select) { 1019 + *name = idx; 1020 + return 0; 1021 + } 1022 + } 1023 + 1024 + return -EINVAL; 1025 + } 1026 + 1027 + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, 1028 + struct xe_oa_open_param *param) 1029 + { 1030 + if (value >= oa->oa_unit_ids) { 1031 + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); 1032 + return -EINVAL; 1033 + } 1034 + param->oa_unit_id = value; 1035 + return 0; 1036 + } 1037 + 1038 + static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, 1039 + struct xe_oa_open_param *param) 1040 + { 1041 + param->sample = value; 1042 + return 0; 1043 + } 1044 + 1045 + static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, 1046 + struct xe_oa_open_param *param) 1047 + { 1048 + param->metric_set = value; 1049 + return 0; 1050 + } 1051 + 1052 + static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, 1053 + struct xe_oa_open_param *param) 1054 + { 1055 + int ret = decode_oa_format(oa, value, &param->oa_format); 1056 + 1057 + if (ret) { 1058 + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); 1059 + return ret; 1060 + } 1061 + return 0; 1062 + } 1063 + 1064 + static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, 1065 + struct xe_oa_open_param *param) 1066 + { 1067 + #define OA_EXPONENT_MAX 31 1068 + 1069 + if (value > OA_EXPONENT_MAX) { 1070 + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); 1071 + return -EINVAL; 1072 + } 1073 + param->period_exponent = value; 1074 + return 0; 1075 + } 1076 + 1077 + static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, 1078 + struct xe_oa_open_param *param) 1079 + { 1080 + param->disabled = value; 1081 + return 0; 1082 + } 1083 + 1084 + static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, 1085 + struct xe_oa_open_param *param) 1086 + { 1087 + param->exec_queue_id = value; 1088 + return 0; 1089 + } 1090 + 1091 + static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, 1092 + struct xe_oa_open_param *param) 1093 + { 1094 + param->engine_instance = value; 1095 + return 0; 1096 + } 1097 + 1098 + static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, 1099 + struct xe_oa_open_param *param) 1100 + { 1101 + param->no_preempt = value; 1102 + return 0; 1103 + } 1104 + 1105 + static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, 1106 + struct xe_oa_open_param *param) 1107 + { 1108 + param->num_syncs = value; 1109 + return 0; 1110 + } 1111 + 1112 + static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, 1113 + struct xe_oa_open_param *param) 1114 + { 1115 + param->syncs_user = u64_to_user_ptr(value); 1116 + return 0; 1117 + } 1118 + 1119 + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, 1120 + struct xe_oa_open_param *param) 1121 + { 1122 + return -EINVAL; 1123 + } 1124 + 1125 + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, 1126 + struct xe_oa_open_param *param); 1127 + static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { 1128 + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, 1129 + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, 1130 + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 1131 + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, 1132 + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, 1133 + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, 1134 + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, 1135 + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, 1136 + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 1137 + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1138 + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1139 + }; 1140 + 1141 + static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { 1142 + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, 1143 + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, 1144 + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 1145 + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, 1146 + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, 1147 + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, 1148 + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, 1149 + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, 1150 + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, 1151 + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1152 + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1153 + }; 1154 + 1155 + static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, 1156 + u64 extension, struct xe_oa_open_param *param) 1157 + { 1158 + u64 __user *address = u64_to_user_ptr(extension); 1159 + struct drm_xe_ext_set_property ext; 1160 + int err; 1161 + u32 idx; 1162 + 1163 + err = __copy_from_user(&ext, address, sizeof(ext)); 1164 + if (XE_IOCTL_DBG(oa->xe, err)) 1165 + return -EFAULT; 1166 + 1167 + BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != 1168 + ARRAY_SIZE(xe_oa_set_property_funcs_config)); 1169 + 1170 + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || 1171 + XE_IOCTL_DBG(oa->xe, ext.pad)) 1172 + return -EINVAL; 1173 + 1174 + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); 1175 + 1176 + if (from == XE_OA_USER_EXTN_FROM_CONFIG) 1177 + return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); 1178 + else 1179 + return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); 1180 + } 1181 + 1182 + typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, 1183 + u64 extension, struct xe_oa_open_param *param); 1184 + static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { 1185 + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, 1186 + }; 1187 + 1188 + #define MAX_USER_EXTENSIONS 16 1189 + static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, 1190 + int ext_number, struct xe_oa_open_param *param) 1191 + { 1192 + u64 __user *address = u64_to_user_ptr(extension); 1193 + struct drm_xe_user_extension ext; 1194 + int err; 1195 + u32 idx; 1196 + 1197 + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) 1198 + return -E2BIG; 1199 + 1200 + err = __copy_from_user(&ext, address, sizeof(ext)); 1201 + if (XE_IOCTL_DBG(oa->xe, err)) 1202 + return -EFAULT; 1203 + 1204 + if (XE_IOCTL_DBG(oa->xe, ext.pad) || 1205 + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) 1206 + return -EINVAL; 1207 + 1208 + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); 1209 + err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); 1210 + if (XE_IOCTL_DBG(oa->xe, err)) 1211 + return err; 1212 + 1213 + if (ext.next_extension) 1214 + return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param); 1215 + 1216 + return 0; 1217 + } 1218 + 1219 + static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) 1220 + { 1221 + int ret, num_syncs, num_ufence = 0; 1222 + 1223 + if (param->num_syncs && !param->syncs_user) { 1224 + drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); 1225 + ret = -EINVAL; 1226 + goto exit; 1227 + } 1228 + 1229 + if (param->num_syncs) { 1230 + param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); 1231 + if (!param->syncs) { 1232 + ret = -ENOMEM; 1233 + goto exit; 1234 + } 1235 + } 1236 + 1237 + for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { 1238 + ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs], 1239 + &param->syncs_user[num_syncs], 0); 1240 + if (ret) 1241 + goto err_syncs; 1242 + 1243 + if (xe_sync_is_ufence(&param->syncs[num_syncs])) 1244 + num_ufence++; 1245 + } 1246 + 1247 + if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { 1248 + ret = -EINVAL; 1249 + goto err_syncs; 1250 + } 1251 + 1252 + return 0; 1253 + 1254 + err_syncs: 1255 + while (num_syncs--) 1256 + xe_sync_entry_cleanup(&param->syncs[num_syncs]); 1257 + kfree(param->syncs); 1258 + exit: 1259 + return ret; 1148 1260 } 1149 1261 1150 1262 static void xe_oa_stream_enable(struct xe_oa_stream *stream) ··· 1496 1096 1497 1097 static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) 1498 1098 { 1499 - struct drm_xe_ext_set_property ext; 1099 + struct xe_oa_open_param param = {}; 1500 1100 long ret = stream->oa_config->id; 1501 1101 struct xe_oa_config *config; 1502 1102 int err; 1503 1103 1504 - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); 1505 - if (XE_IOCTL_DBG(stream->oa->xe, err)) 1506 - return -EFAULT; 1104 + err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, &param); 1105 + if (err) 1106 + return err; 1507 1107 1508 - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || 1509 - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || 1510 - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || 1511 - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) 1512 - return -EINVAL; 1513 - 1514 - config = xe_oa_get_oa_config(stream->oa, ext.value); 1108 + config = xe_oa_get_oa_config(stream->oa, param.metric_set); 1515 1109 if (!config) 1516 1110 return -ENODEV; 1517 1111 1518 - if (config != stream->oa_config) { 1519 - err = xe_oa_emit_oa_config(stream, config); 1520 - if (!err) 1521 - config = xchg(&stream->oa_config, config); 1522 - else 1523 - ret = err; 1112 + param.xef = stream->xef; 1113 + err = xe_oa_parse_syncs(stream->oa, &param); 1114 + if (err) 1115 + goto err_config_put; 1116 + 1117 + stream->num_syncs = param.num_syncs; 1118 + stream->syncs = param.syncs; 1119 + 1120 + err = xe_oa_emit_oa_config(stream, config); 1121 + if (!err) { 1122 + config = xchg(&stream->oa_config, config); 1123 + drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n", 1124 + stream->oa_config->uuid); 1524 1125 } 1525 1126 1127 + err_config_put: 1526 1128 xe_oa_config_put(config); 1527 1129 1528 - return ret; 1130 + return err ?: ret; 1529 1131 } 1530 1132 1531 1133 static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) ··· 1755 1353 { 1756 1354 struct xe_oa_unit *u = param->hwe->oa_unit; 1757 1355 struct xe_gt *gt = param->hwe->gt; 1356 + unsigned int fw_ref; 1758 1357 int ret; 1759 1358 1760 1359 stream->exec_q = param->exec_q; ··· 1768 1365 stream->periodic = param->period_exponent > 0; 1769 1366 stream->period_exponent = param->period_exponent; 1770 1367 stream->no_preempt = param->no_preempt; 1368 + 1369 + stream->xef = xe_file_get(param->xef); 1370 + stream->num_syncs = param->num_syncs; 1371 + stream->syncs = param->syncs; 1771 1372 1772 1373 /* 1773 1374 * For Xe2+, when overrun mode is enabled, there are no partial reports at the end ··· 1820 1413 1821 1414 /* Take runtime pm ref and forcewake to disable RC6 */ 1822 1415 xe_pm_runtime_get(stream->oa->xe); 1823 - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 1416 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 1417 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 1418 + ret = -ETIMEDOUT; 1419 + goto err_fw_put; 1420 + } 1824 1421 1825 1422 ret = xe_oa_alloc_oa_buffer(stream); 1826 1423 if (ret) ··· 1866 1455 err_free_oa_buf: 1867 1456 xe_oa_free_oa_buffer(stream); 1868 1457 err_fw_put: 1869 - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 1458 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 1870 1459 xe_pm_runtime_put(stream->oa->xe); 1871 1460 if (stream->override_gucrc) 1872 1461 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc)); 1873 1462 err_free_configs: 1874 1463 xe_oa_free_configs(stream); 1875 1464 exit: 1465 + xe_file_put(stream->xef); 1876 1466 return ret; 1877 1467 } 1878 1468 ··· 1983 1571 } 1984 1572 } 1985 1573 1986 - static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) 1987 - { 1988 - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); 1989 - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); 1990 - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); 1991 - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); 1992 - int idx; 1993 - 1994 - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { 1995 - const struct xe_oa_format *f = &oa->oa_formats[idx]; 1996 - 1997 - if (counter_size == f->counter_size && bc_report == f->bc_report && 1998 - type == f->type && counter_sel == f->counter_select) { 1999 - *name = idx; 2000 - return 0; 2001 - } 2002 - } 2003 - 2004 - return -EINVAL; 2005 - } 2006 - 2007 1574 /** 2008 1575 * xe_oa_unit_id - Return OA unit ID for a hardware engine 2009 1576 * @hwe: @xe_hw_engine ··· 2029 1638 return ret; 2030 1639 } 2031 1640 2032 - static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, 2033 - struct xe_oa_open_param *param) 2034 - { 2035 - if (value >= oa->oa_unit_ids) { 2036 - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); 2037 - return -EINVAL; 2038 - } 2039 - param->oa_unit_id = value; 2040 - return 0; 2041 - } 2042 - 2043 - static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, 2044 - struct xe_oa_open_param *param) 2045 - { 2046 - param->sample = value; 2047 - return 0; 2048 - } 2049 - 2050 - static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, 2051 - struct xe_oa_open_param *param) 2052 - { 2053 - param->metric_set = value; 2054 - return 0; 2055 - } 2056 - 2057 - static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, 2058 - struct xe_oa_open_param *param) 2059 - { 2060 - int ret = decode_oa_format(oa, value, &param->oa_format); 2061 - 2062 - if (ret) { 2063 - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); 2064 - return ret; 2065 - } 2066 - return 0; 2067 - } 2068 - 2069 - static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, 2070 - struct xe_oa_open_param *param) 2071 - { 2072 - #define OA_EXPONENT_MAX 31 2073 - 2074 - if (value > OA_EXPONENT_MAX) { 2075 - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); 2076 - return -EINVAL; 2077 - } 2078 - param->period_exponent = value; 2079 - return 0; 2080 - } 2081 - 2082 - static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, 2083 - struct xe_oa_open_param *param) 2084 - { 2085 - param->disabled = value; 2086 - return 0; 2087 - } 2088 - 2089 - static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, 2090 - struct xe_oa_open_param *param) 2091 - { 2092 - param->exec_queue_id = value; 2093 - return 0; 2094 - } 2095 - 2096 - static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, 2097 - struct xe_oa_open_param *param) 2098 - { 2099 - param->engine_instance = value; 2100 - return 0; 2101 - } 2102 - 2103 - static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, 2104 - struct xe_oa_open_param *param) 2105 - { 2106 - param->no_preempt = value; 2107 - return 0; 2108 - } 2109 - 2110 - typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, 2111 - struct xe_oa_open_param *param); 2112 - static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { 2113 - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, 2114 - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, 2115 - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 2116 - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, 2117 - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, 2118 - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, 2119 - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, 2120 - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, 2121 - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 2122 - }; 2123 - 2124 - static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, 2125 - struct xe_oa_open_param *param) 2126 - { 2127 - u64 __user *address = u64_to_user_ptr(extension); 2128 - struct drm_xe_ext_set_property ext; 2129 - int err; 2130 - u32 idx; 2131 - 2132 - err = __copy_from_user(&ext, address, sizeof(ext)); 2133 - if (XE_IOCTL_DBG(oa->xe, err)) 2134 - return -EFAULT; 2135 - 2136 - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || 2137 - XE_IOCTL_DBG(oa->xe, ext.pad)) 2138 - return -EINVAL; 2139 - 2140 - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); 2141 - return xe_oa_set_property_funcs[idx](oa, ext.value, param); 2142 - } 2143 - 2144 - typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, 2145 - struct xe_oa_open_param *param); 2146 - static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { 2147 - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, 2148 - }; 2149 - 2150 - #define MAX_USER_EXTENSIONS 16 2151 - static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, 2152 - struct xe_oa_open_param *param) 2153 - { 2154 - u64 __user *address = u64_to_user_ptr(extension); 2155 - struct drm_xe_user_extension ext; 2156 - int err; 2157 - u32 idx; 2158 - 2159 - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) 2160 - return -E2BIG; 2161 - 2162 - err = __copy_from_user(&ext, address, sizeof(ext)); 2163 - if (XE_IOCTL_DBG(oa->xe, err)) 2164 - return -EFAULT; 2165 - 2166 - if (XE_IOCTL_DBG(oa->xe, ext.pad) || 2167 - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) 2168 - return -EINVAL; 2169 - 2170 - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); 2171 - err = xe_oa_user_extension_funcs[idx](oa, extension, param); 2172 - if (XE_IOCTL_DBG(oa->xe, err)) 2173 - return err; 2174 - 2175 - if (ext.next_extension) 2176 - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); 2177 - 2178 - return 0; 2179 - } 2180 - 2181 1641 /** 2182 1642 * xe_oa_stream_open_ioctl - Opens an OA stream 2183 1643 * @dev: @drm_device ··· 2054 1812 return -ENODEV; 2055 1813 } 2056 1814 2057 - ret = xe_oa_user_extensions(oa, data, 0, &param); 1815 + param.xef = xef; 1816 + ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param); 2058 1817 if (ret) 2059 1818 return ret; 2060 1819 ··· 2123 1880 drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); 2124 1881 } 2125 1882 1883 + ret = xe_oa_parse_syncs(oa, &param); 1884 + if (ret) 1885 + goto err_exec_q; 1886 + 2126 1887 mutex_lock(&param.hwe->gt->oa.gt_lock); 2127 1888 ret = xe_oa_stream_open_ioctl_locked(oa, &param); 2128 1889 mutex_unlock(&param.hwe->gt->oa.gt_lock); 1890 + if (ret < 0) 1891 + goto err_sync_cleanup; 1892 + 1893 + return ret; 1894 + 1895 + err_sync_cleanup: 1896 + while (param.num_syncs--) 1897 + xe_sync_entry_cleanup(&param.syncs[param.num_syncs]); 1898 + kfree(param.syncs); 2129 1899 err_exec_q: 2130 - if (ret < 0 && param.exec_q) 1900 + if (param.exec_q) 2131 1901 xe_exec_queue_put(param.exec_q); 2132 1902 return ret; 2133 1903 }
+12
drivers/gpu/drm/xe/xe_oa_types.h
··· 238 238 239 239 /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ 240 240 u32 no_preempt; 241 + 242 + /** @xef: xe_file with which the stream was opened */ 243 + struct xe_file *xef; 244 + 245 + /** @last_fence: fence to use in stream destroy when needed */ 246 + struct dma_fence *last_fence; 247 + 248 + /** @num_syncs: size of @syncs array */ 249 + u32 num_syncs; 250 + 251 + /** @syncs: syncs to wait on and to signal */ 252 + struct xe_sync_entry *syncs; 241 253 }; 242 254 #endif
+30 -35
drivers/gpu/drm/xe/xe_pat.c
··· 182 182 static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) 183 183 { 184 184 struct xe_device *xe = gt_to_xe(gt); 185 - int i, err; 185 + unsigned int fw_ref; 186 + int i; 186 187 187 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 188 - if (err) 189 - goto err_fw; 188 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 189 + if (!fw_ref) 190 + return; 190 191 191 192 drm_printf(p, "PAT table:\n"); 192 193 ··· 199 198 XELP_MEM_TYPE_STR_MAP[mem_type], pat); 200 199 } 201 200 202 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 203 - err_fw: 204 - xe_assert(xe, !err); 201 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 205 202 } 206 203 207 204 static const struct xe_pat_ops xelp_pat_ops = { ··· 210 211 static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) 211 212 { 212 213 struct xe_device *xe = gt_to_xe(gt); 213 - int i, err; 214 + unsigned int fw_ref; 215 + int i; 214 216 215 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 216 - if (err) 217 - goto err_fw; 217 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 218 + if (!fw_ref) 219 + return; 218 220 219 221 drm_printf(p, "PAT table:\n"); 220 222 ··· 229 229 XELP_MEM_TYPE_STR_MAP[mem_type], pat); 230 230 } 231 231 232 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 233 - err_fw: 234 - xe_assert(xe, !err); 232 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 235 233 } 236 234 237 235 static const struct xe_pat_ops xehp_pat_ops = { ··· 240 242 static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) 241 243 { 242 244 struct xe_device *xe = gt_to_xe(gt); 243 - int i, err; 245 + unsigned int fw_ref; 246 + int i; 244 247 245 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 246 - if (err) 247 - goto err_fw; 248 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 249 + if (!fw_ref) 250 + return; 248 251 249 252 drm_printf(p, "PAT table:\n"); 250 253 ··· 257 258 REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); 258 259 } 259 260 260 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 261 - err_fw: 262 - xe_assert(xe, !err); 261 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 263 262 } 264 263 265 264 static const struct xe_pat_ops xehpc_pat_ops = { ··· 268 271 static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) 269 272 { 270 273 struct xe_device *xe = gt_to_xe(gt); 271 - int i, err; 274 + unsigned int fw_ref; 275 + int i; 272 276 273 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 274 - if (err) 275 - goto err_fw; 277 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 278 + if (!fw_ref) 279 + return; 276 280 277 281 drm_printf(p, "PAT table:\n"); 278 282 ··· 290 292 REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); 291 293 } 292 294 293 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 294 - err_fw: 295 - xe_assert(xe, !err); 295 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 296 296 } 297 297 298 298 /* ··· 326 330 static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) 327 331 { 328 332 struct xe_device *xe = gt_to_xe(gt); 329 - int i, err; 333 + unsigned int fw_ref; 330 334 u32 pat; 335 + int i; 331 336 332 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 333 - if (err) 334 - goto err_fw; 337 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 338 + if (!fw_ref) 339 + return; 335 340 336 341 drm_printf(p, "PAT table:\n"); 337 342 ··· 371 374 REG_FIELD_GET(XE2_COH_MODE, pat), 372 375 pat); 373 376 374 - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 375 - err_fw: 376 - xe_assert(xe, !err); 377 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 377 378 } 378 379 379 380 static const struct xe_pat_ops xe2_pat_ops = {
+7 -3
drivers/gpu/drm/xe/xe_query.c
··· 117 117 __ktime_func_t cpu_clock; 118 118 struct xe_hw_engine *hwe; 119 119 struct xe_gt *gt; 120 + unsigned int fw_ref; 120 121 121 122 if (query->size == 0) { 122 123 query->size = size; ··· 150 149 if (!hwe) 151 150 return -EINVAL; 152 151 153 - if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) 152 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 153 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 154 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 154 155 return -EIO; 156 + } 155 157 156 158 hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, 157 159 &resp.cpu_delta, cpu_clock); 158 160 159 - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 161 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 160 162 161 163 if (GRAPHICS_VER(xe) >= 20) 162 164 resp.width = 64; ··· 670 666 du->oa_unit_id = u->oa_unit_id; 671 667 du->oa_unit_type = u->type; 672 668 du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 673 - du->capabilities = DRM_XE_OA_CAPS_BASE; 669 + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; 674 670 675 671 j = 0; 676 672 for_each_hw_engine(hwe, gt, hwe_id) {
+12 -12
drivers/gpu/drm/xe/xe_reg_sr.c
··· 188 188 { 189 189 struct xe_reg_sr_entry *entry; 190 190 unsigned long reg; 191 - int err; 191 + unsigned int fw_ref; 192 192 193 193 if (xa_empty(&sr->xa)) 194 194 return; 195 195 196 196 xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); 197 197 198 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 199 - if (err) 198 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 199 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 200 200 goto err_force_wake; 201 201 202 202 xa_for_each(&sr->xa, reg, entry) 203 203 apply_one_mmio(gt, entry); 204 204 205 - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 206 - XE_WARN_ON(err); 205 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 207 206 208 207 return; 209 208 210 209 err_force_wake: 211 - xe_gt_err(gt, "Failed to apply, err=%d\n", err); 210 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 211 + xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); 212 212 } 213 213 214 214 void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) ··· 221 221 u32 mmio_base = hwe->mmio_base; 222 222 unsigned long reg; 223 223 unsigned int slot = 0; 224 - int err; 224 + unsigned int fw_ref; 225 225 226 226 if (xa_empty(&sr->xa)) 227 227 return; 228 228 229 229 drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); 230 230 231 - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 232 - if (err) 231 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 232 + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 233 233 goto err_force_wake; 234 234 235 235 p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); ··· 254 254 xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); 255 255 } 256 256 257 - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 258 - XE_WARN_ON(err); 257 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 259 258 260 259 return; 261 260 262 261 err_force_wake: 263 - drm_err(&xe->drm, "Failed to apply, err=%d\n", err); 262 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 263 + drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); 264 264 } 265 265 266 266 /**
+1 -1
drivers/gpu/drm/xe/xe_sched_job.c
··· 280 280 fence = &chain->base; 281 281 } 282 282 283 - job->fence = fence; 283 + job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */ 284 284 drm_sched_job_arm(&job->drm); 285 285 } 286 286
+1 -2
drivers/gpu/drm/xe/xe_sched_job_types.h
··· 40 40 * @fence: dma fence to indicate completion. 1 way relationship - job 41 41 * can safely reference fence, fence cannot safely reference job. 42 42 */ 43 - #define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS 44 43 struct dma_fence *fence; 45 44 /** @user_fence: write back value when BB is complete */ 46 45 struct { ··· 62 63 63 64 struct xe_sched_job_snapshot { 64 65 u16 batch_addr_len; 65 - u64 batch_addr[]; 66 + u64 batch_addr[] __counted_by(batch_addr_len); 66 67 }; 67 68 68 69 #endif
+2
drivers/gpu/drm/xe/xe_sync.c
··· 83 83 XE_WARN_ON("Copy to user failed"); 84 84 kthread_unuse_mm(ufence->mm); 85 85 mmput(ufence->mm); 86 + } else { 87 + drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n"); 86 88 } 87 89 88 90 wake_up_all(&ufence->xe->ufence_wq);
+7 -5
drivers/gpu/drm/xe/xe_vram.c
··· 220 220 { 221 221 struct xe_device *xe = tile_to_xe(tile); 222 222 struct xe_gt *gt = tile->primary_gt; 223 + unsigned int fw_ref; 223 224 u64 offset; 224 - int err; 225 225 u32 reg; 226 226 227 227 if (IS_SRIOV_VF(xe)) { ··· 240 240 return 0; 241 241 } 242 242 243 - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 244 - if (err) 245 - return err; 243 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 244 + if (!fw_ref) 245 + return -ETIMEDOUT; 246 246 247 247 /* actual size */ 248 248 if (unlikely(xe->info.platform == XE_DG1)) { ··· 264 264 /* remove the tile offset so we have just the available size */ 265 265 *vram_size = offset - *tile_offset; 266 266 267 - return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 267 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 268 + 269 + return 0; 268 270 } 269 271 270 272 static void vram_fini(void *arg)
+2
drivers/gpu/drm/xe/xe_wa_oob.rules
··· 39 39 14019789679 GRAPHICS_VERSION(1255) 40 40 GRAPHICS_VERSION_RANGE(1270, 2004) 41 41 no_media_l3 MEDIA_VERSION(3000) 42 + 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) 43 + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
+17
include/uapi/drm/xe_drm.h
··· 1485 1485 /** @capabilities: OA capabilities bit-mask */ 1486 1486 __u64 capabilities; 1487 1487 #define DRM_XE_OA_CAPS_BASE (1 << 0) 1488 + #define DRM_XE_OA_CAPS_SYNCS (1 << 1) 1488 1489 1489 1490 /** @oa_timestamp_freq: OA timestamp freq */ 1490 1491 __u64 oa_timestamp_freq; ··· 1635 1634 * to be disabled for the stream exec queue. 1636 1635 */ 1637 1636 DRM_XE_OA_PROPERTY_NO_PREEMPT, 1637 + 1638 + /** 1639 + * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array 1640 + * specified in @DRM_XE_OA_PROPERTY_SYNCS 1641 + */ 1642 + DRM_XE_OA_PROPERTY_NUM_SYNCS, 1643 + 1644 + /** 1645 + * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array 1646 + * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA 1647 + * configuration will wait till input fences signal. Output fences 1648 + * will signal after the new OA configuration takes effect. For 1649 + * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar 1650 + * to the VM bind case. 1651 + */ 1652 + DRM_XE_OA_PROPERTY_SYNCS, 1638 1653 }; 1639 1654 1640 1655 /**