at v6.19 987 lines 27 kB view raw
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6#include "xe_pm.h" 7 8#include <linux/fault-inject.h> 9#include <linux/pm_runtime.h> 10#include <linux/suspend.h> 11#include <linux/dmi.h> 12 13#include <drm/drm_managed.h> 14#include <drm/ttm/ttm_placement.h> 15 16#include "display/xe_display.h" 17#include "xe_bo.h" 18#include "xe_bo_evict.h" 19#include "xe_device.h" 20#include "xe_ggtt.h" 21#include "xe_gt.h" 22#include "xe_gt_idle.h" 23#include "xe_i2c.h" 24#include "xe_irq.h" 25#include "xe_late_bind_fw.h" 26#include "xe_pcode.h" 27#include "xe_pxp.h" 28#include "xe_sriov_vf_ccs.h" 29#include "xe_trace.h" 30#include "xe_vm.h" 31#include "xe_wa.h" 32 33/** 34 * DOC: Xe Power Management 35 * 36 * Xe PM implements the main routines for both system level suspend states and 37 * for the opportunistic runtime suspend states. 38 * 39 * System Level Suspend (S-States) - In general this is OS initiated suspend 40 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), 41 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They 42 * are the main point for the suspend to and resume from these states. 43 * 44 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power 45 * state D3, controlled by the PCI subsystem and ACPI with the help from the 46 * runtime_pm infrastructure. 47 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory 48 * alive and quicker low latency resume or D3Cold where Vcc power is off for 49 * better power savings. 50 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port 51 * level, while the device driver can be behind multiple bridges/switches and 52 * paired with other devices. For this reason, the PCI subsystem cannot perform 53 * the transition towards D3Cold. The lowest runtime PM possible from the PCI 54 * subsystem is D3hot. Then, if all these paired devices in the same root port 55 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) 56 * to perform the transition from D3hot to D3cold. Xe may disallow this 57 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime 58 * suspend. It will be based on runtime conditions such as VRAM usage for a 59 * quick and low latency resume for instance. 60 * 61 * Runtime PM - This infrastructure provided by the Linux kernel allows the 62 * device drivers to indicate when the can be runtime suspended, so the device 63 * could be put at D3 (if supported), or allow deeper package sleep states 64 * (PC-states), and/or other low level power states. Xe PM component provides 65 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI 66 * subsystem will call before transition to/from runtime suspend. 67 * 68 * Also, Xe PM provides get and put functions that Xe driver will use to 69 * indicate activity. In order to avoid locking complications with the memory 70 * management, whenever possible, these get and put functions needs to be called 71 * from the higher/outer levels. 72 * The main cases that need to be protected from the outer levels are: IOCTL, 73 * sysfs, debugfs, dma-buf sharing, GPU execution. 74 * 75 * This component is not responsible for GT idleness (RC6) nor GT frequency 76 * management (RPS). 77 */ 78 79#ifdef CONFIG_LOCKDEP 80static struct lockdep_map xe_pm_runtime_d3cold_map = { 81 .name = "xe_rpm_d3cold_map" 82}; 83 84static struct lockdep_map xe_pm_runtime_nod3cold_map = { 85 .name = "xe_rpm_nod3cold_map" 86}; 87 88static struct lockdep_map xe_pm_block_lockdep_map = { 89 .name = "xe_pm_block_map", 90}; 91#endif 92 93static void xe_pm_block_begin_signalling(void) 94{ 95 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); 96} 97 98static void xe_pm_block_end_signalling(void) 99{ 100 lock_release(&xe_pm_block_lockdep_map, _RET_IP_); 101} 102 103/** 104 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 105 * 106 * Annotation to use where the code might block or seize to make 107 * progress pending resume completion. 108 */ 109void xe_pm_might_block_on_suspend(void) 110{ 111 lock_map_acquire(&xe_pm_block_lockdep_map); 112 lock_map_release(&xe_pm_block_lockdep_map); 113} 114 115/** 116 * xe_pm_block_on_suspend() - Block pending suspend. 117 * @xe: The xe device about to be suspended. 118 * 119 * Block if the pm notifier has start evicting bos, to avoid 120 * racing and validating those bos back. The function is 121 * annotated to ensure no locks are held that are also grabbed 122 * in the pm notifier or the device suspend / resume. 123 * This is intended to be used by freezable tasks only. 124 * (Not freezable workqueues), with the intention that the function 125 * returns %-ERESTARTSYS when tasks are frozen during suspend, 126 * and allows the task to freeze. The caller must be able to 127 * handle the %-ERESTARTSYS. 128 * 129 * Return: %0 on success, %-ERESTARTSYS on signal pending or 130 * if freezing requested. 131 */ 132int xe_pm_block_on_suspend(struct xe_device *xe) 133{ 134 xe_pm_might_block_on_suspend(); 135 136 return wait_for_completion_interruptible(&xe->pm_block); 137} 138 139/** 140 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context 141 * @xe: The xe device. 142 * 143 * Return: true if it is safe to runtime resume from reclaim context. 144 * false otherwise. 145 */ 146bool xe_rpm_reclaim_safe(const struct xe_device *xe) 147{ 148 return !xe->d3cold.capable; 149} 150 151static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 152{ 153 lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 154 &xe_pm_runtime_nod3cold_map : 155 &xe_pm_runtime_d3cold_map); 156} 157 158static void xe_rpm_lockmap_release(const struct xe_device *xe) 159{ 160 lock_map_release(xe_rpm_reclaim_safe(xe) ? 161 &xe_pm_runtime_nod3cold_map : 162 &xe_pm_runtime_d3cold_map); 163} 164 165/** 166 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 167 * @xe: xe device instance 168 * 169 * Return: 0 on success 170 */ 171int xe_pm_suspend(struct xe_device *xe) 172{ 173 struct xe_gt *gt; 174 u8 id; 175 int err; 176 177 drm_dbg(&xe->drm, "Suspending device\n"); 178 xe_pm_block_begin_signalling(); 179 trace_xe_pm_suspend(xe, __builtin_return_address(0)); 180 181 err = xe_pxp_pm_suspend(xe->pxp); 182 if (err) 183 goto err; 184 185 xe_late_bind_wait_for_worker_completion(&xe->late_bind); 186 187 for_each_gt(gt, xe, id) 188 xe_gt_suspend_prepare(gt); 189 190 xe_display_pm_suspend(xe); 191 192 /* FIXME: Super racey... */ 193 err = xe_bo_evict_all(xe); 194 if (err) 195 goto err_display; 196 197 for_each_gt(gt, xe, id) { 198 err = xe_gt_suspend(gt); 199 if (err) 200 goto err_display; 201 } 202 203 xe_irq_suspend(xe); 204 205 xe_display_pm_suspend_late(xe); 206 207 xe_i2c_pm_suspend(xe); 208 209 drm_dbg(&xe->drm, "Device suspended\n"); 210 xe_pm_block_end_signalling(); 211 212 return 0; 213 214err_display: 215 xe_display_pm_resume(xe); 216 xe_pxp_pm_resume(xe->pxp); 217err: 218 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); 219 xe_pm_block_end_signalling(); 220 return err; 221} 222 223/** 224 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 225 * @xe: xe device instance 226 * 227 * Return: 0 on success 228 */ 229int xe_pm_resume(struct xe_device *xe) 230{ 231 struct xe_tile *tile; 232 struct xe_gt *gt; 233 u8 id; 234 int err; 235 236 xe_pm_block_begin_signalling(); 237 drm_dbg(&xe->drm, "Resuming device\n"); 238 trace_xe_pm_resume(xe, __builtin_return_address(0)); 239 240 for_each_gt(gt, xe, id) 241 xe_gt_idle_disable_c6(gt); 242 243 for_each_tile(tile, xe, id) 244 xe_wa_apply_tile_workarounds(tile); 245 246 err = xe_pcode_ready(xe, true); 247 if (err) 248 return err; 249 250 xe_display_pm_resume_early(xe); 251 252 /* 253 * This only restores pinned memory which is the memory required for the 254 * GT(s) to resume. 255 */ 256 err = xe_bo_restore_early(xe); 257 if (err) 258 goto err; 259 260 xe_i2c_pm_resume(xe, true); 261 262 xe_irq_resume(xe); 263 264 for_each_gt(gt, xe, id) 265 xe_gt_resume(gt); 266 267 xe_display_pm_resume(xe); 268 269 err = xe_bo_restore_late(xe); 270 if (err) 271 goto err; 272 273 xe_pxp_pm_resume(xe->pxp); 274 275 if (IS_VF_CCS_READY(xe)) 276 xe_sriov_vf_ccs_register_context(xe); 277 278 xe_late_bind_fw_load(&xe->late_bind); 279 280 drm_dbg(&xe->drm, "Device resumed\n"); 281 xe_pm_block_end_signalling(); 282 return 0; 283err: 284 drm_dbg(&xe->drm, "Device resume failed %d\n", err); 285 xe_pm_block_end_signalling(); 286 return err; 287} 288 289static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 290{ 291 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 292 struct pci_dev *root_pdev; 293 294 root_pdev = pcie_find_root_port(pdev); 295 if (!root_pdev) 296 return false; 297 298 /* D3Cold requires PME capability */ 299 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 300 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 301 return false; 302 } 303 304 /* D3Cold requires _PR3 power resource */ 305 if (!pci_pr3_present(root_pdev)) { 306 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 307 return false; 308 } 309 310 return true; 311} 312 313static void xe_pm_runtime_init(struct xe_device *xe) 314{ 315 struct device *dev = xe->drm.dev; 316 317 /* Our current VFs do not support RPM. so, disable it */ 318 if (IS_SRIOV_VF(xe)) 319 return; 320 321 /* 322 * Disable the system suspend direct complete optimization. 323 * We need to ensure that the regular device suspend/resume functions 324 * are called since our runtime_pm cannot guarantee local memory 325 * eviction for d3cold. 326 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 327 * this option to integrated graphics as well. 328 */ 329 if (IS_DGFX(xe)) 330 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 331 332 pm_runtime_use_autosuspend(dev); 333 pm_runtime_set_autosuspend_delay(dev, 1000); 334 pm_runtime_set_active(dev); 335 pm_runtime_allow(dev); 336 pm_runtime_mark_last_busy(dev); 337 pm_runtime_put(dev); 338} 339 340int xe_pm_init_early(struct xe_device *xe) 341{ 342 int err; 343 344 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 345 346 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 347 if (err) 348 return err; 349 350 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 351 if (err) 352 return err; 353 354 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 355 return 0; 356} 357ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ 358 359static u32 vram_threshold_value(struct xe_device *xe) 360{ 361 if (xe->info.platform == XE_BATTLEMAGE) { 362 const char *product_name; 363 364 product_name = dmi_get_system_info(DMI_PRODUCT_NAME); 365 if (product_name && strstr(product_name, "NUC13RNG")) { 366 drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n"); 367 return 0; 368 } 369 } 370 371 return DEFAULT_VRAM_THRESHOLD; 372} 373 374static void xe_pm_wake_rebind_workers(struct xe_device *xe) 375{ 376 struct xe_vm *vm, *next; 377 378 mutex_lock(&xe->rebind_resume_lock); 379 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, 380 preempt.pm_activate_link) { 381 list_del_init(&vm->preempt.pm_activate_link); 382 xe_vm_resume_rebind_worker(vm); 383 } 384 mutex_unlock(&xe->rebind_resume_lock); 385} 386 387static int xe_pm_notifier_callback(struct notifier_block *nb, 388 unsigned long action, void *data) 389{ 390 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); 391 int err = 0; 392 393 switch (action) { 394 case PM_HIBERNATION_PREPARE: 395 case PM_SUSPEND_PREPARE: 396 { 397 struct xe_validation_ctx ctx; 398 399 reinit_completion(&xe->pm_block); 400 xe_pm_block_begin_signalling(); 401 xe_pm_runtime_get(xe); 402 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, 403 (struct xe_val_flags) {.exclusive = true}); 404 err = xe_bo_evict_all_user(xe); 405 xe_validation_ctx_fini(&ctx); 406 if (err) 407 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); 408 409 err = xe_bo_notifier_prepare_all_pinned(xe); 410 if (err) 411 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); 412 /* 413 * Keep the runtime pm reference until post hibernation / post suspend to 414 * avoid a runtime suspend interfering with evicted objects or backup 415 * allocations. 416 */ 417 xe_pm_block_end_signalling(); 418 break; 419 } 420 case PM_POST_HIBERNATION: 421 case PM_POST_SUSPEND: 422 complete_all(&xe->pm_block); 423 xe_pm_wake_rebind_workers(xe); 424 xe_bo_notifier_unprepare_all_pinned(xe); 425 xe_pm_runtime_put(xe); 426 break; 427 } 428 429 return NOTIFY_DONE; 430} 431 432/** 433 * xe_pm_init - Initialize Xe Power Management 434 * @xe: xe device instance 435 * 436 * This component is responsible for System and Device sleep states. 437 * 438 * Returns 0 for success, negative error code otherwise. 439 */ 440int xe_pm_init(struct xe_device *xe) 441{ 442 u32 vram_threshold; 443 int err; 444 445 xe->pm_notifier.notifier_call = xe_pm_notifier_callback; 446 err = register_pm_notifier(&xe->pm_notifier); 447 if (err) 448 return err; 449 450 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); 451 if (err) 452 goto err_unregister; 453 454 init_completion(&xe->pm_block); 455 complete_all(&xe->pm_block); 456 INIT_LIST_HEAD(&xe->rebind_resume_list); 457 458 /* For now suspend/resume is only allowed with GuC */ 459 if (!xe_device_uc_enabled(xe)) 460 return 0; 461 462 if (xe->d3cold.capable) { 463 vram_threshold = vram_threshold_value(xe); 464 err = xe_pm_set_vram_threshold(xe, vram_threshold); 465 if (err) 466 goto err_unregister; 467 } 468 469 xe_pm_runtime_init(xe); 470 return 0; 471 472err_unregister: 473 unregister_pm_notifier(&xe->pm_notifier); 474 return err; 475} 476 477static void xe_pm_runtime_fini(struct xe_device *xe) 478{ 479 struct device *dev = xe->drm.dev; 480 481 /* Our current VFs do not support RPM. so, disable it */ 482 if (IS_SRIOV_VF(xe)) 483 return; 484 485 pm_runtime_get_sync(dev); 486 pm_runtime_forbid(dev); 487} 488 489/** 490 * xe_pm_fini - Finalize PM 491 * @xe: xe device instance 492 */ 493void xe_pm_fini(struct xe_device *xe) 494{ 495 if (xe_device_uc_enabled(xe)) 496 xe_pm_runtime_fini(xe); 497 498 unregister_pm_notifier(&xe->pm_notifier); 499} 500 501static void xe_pm_write_callback_task(struct xe_device *xe, 502 struct task_struct *task) 503{ 504 WRITE_ONCE(xe->pm_callback_task, task); 505 506 /* 507 * Just in case it's somehow possible for our writes to be reordered to 508 * the extent that something else re-uses the task written in 509 * pm_callback_task. For example after returning from the callback, but 510 * before the reordered write that resets pm_callback_task back to NULL. 511 */ 512 smp_mb(); /* pairs with xe_pm_read_callback_task */ 513} 514 515struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 516{ 517 smp_mb(); /* pairs with xe_pm_write_callback_task */ 518 519 return READ_ONCE(xe->pm_callback_task); 520} 521 522/** 523 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended 524 * @xe: xe device instance 525 * 526 * This does not provide any guarantee that the device is going to remain 527 * suspended as it might be racing with the runtime state transitions. 528 * It can be used only as a non-reliable assertion, to ensure that we are not in 529 * the sleep state while trying to access some memory for instance. 530 * 531 * Returns true if PCI device is suspended, false otherwise. 532 */ 533bool xe_pm_runtime_suspended(struct xe_device *xe) 534{ 535 return pm_runtime_suspended(xe->drm.dev); 536} 537 538/** 539 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold 540 * @xe: xe device instance 541 * 542 * Returns 0 for success, negative error code otherwise. 543 */ 544int xe_pm_runtime_suspend(struct xe_device *xe) 545{ 546 struct xe_bo *bo, *on; 547 struct xe_gt *gt; 548 u8 id; 549 int err = 0; 550 551 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); 552 /* Disable access_ongoing asserts and prevent recursive pm calls */ 553 xe_pm_write_callback_task(xe, current); 554 555 /* 556 * The actual xe_pm_runtime_put() is always async underneath, so 557 * exactly where that is called should makes no difference to us. However 558 * we still need to be very careful with the locks that this callback 559 * acquires and the locks that are acquired and held by any callers of 560 * xe_runtime_pm_get(). We already have the matching annotation 561 * on that side, but we also need it here. For example lockdep should be 562 * able to tell us if the following scenario is in theory possible: 563 * 564 * CPU0 | CPU1 (kworker) 565 * lock(A) | 566 * | xe_pm_runtime_suspend() 567 * | lock(A) 568 * xe_pm_runtime_get() | 569 * 570 * This will clearly deadlock since rpm core needs to wait for 571 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 572 * on CPU0 which prevents CPU1 making forward progress. With the 573 * annotation here and in xe_pm_runtime_get() lockdep will see 574 * the potential lock inversion and give us a nice splat. 575 */ 576 xe_rpm_lockmap_acquire(xe); 577 578 err = xe_pxp_pm_suspend(xe->pxp); 579 if (err) 580 goto out; 581 582 /* 583 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 584 * also checks and deletes bo entry from user fault list. 585 */ 586 mutex_lock(&xe->mem_access.vram_userfault.lock); 587 list_for_each_entry_safe(bo, on, 588 &xe->mem_access.vram_userfault.list, vram_userfault_link) 589 xe_bo_runtime_pm_release_mmap_offset(bo); 590 mutex_unlock(&xe->mem_access.vram_userfault.lock); 591 592 xe_display_pm_runtime_suspend(xe); 593 594 if (xe->d3cold.allowed) { 595 err = xe_bo_evict_all(xe); 596 if (err) 597 goto out_resume; 598 } 599 600 for_each_gt(gt, xe, id) { 601 err = xe_gt_suspend(gt); 602 if (err) 603 goto out_resume; 604 } 605 606 xe_irq_suspend(xe); 607 608 xe_display_pm_runtime_suspend_late(xe); 609 610 xe_i2c_pm_suspend(xe); 611 612 xe_rpm_lockmap_release(xe); 613 xe_pm_write_callback_task(xe, NULL); 614 return 0; 615 616out_resume: 617 xe_display_pm_runtime_resume(xe); 618 xe_pxp_pm_resume(xe->pxp); 619out: 620 xe_rpm_lockmap_release(xe); 621 xe_pm_write_callback_task(xe, NULL); 622 return err; 623} 624 625/** 626 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold 627 * @xe: xe device instance 628 * 629 * Returns 0 for success, negative error code otherwise. 630 */ 631int xe_pm_runtime_resume(struct xe_device *xe) 632{ 633 struct xe_gt *gt; 634 u8 id; 635 int err = 0; 636 637 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); 638 /* Disable access_ongoing asserts and prevent recursive pm calls */ 639 xe_pm_write_callback_task(xe, current); 640 641 xe_rpm_lockmap_acquire(xe); 642 643 for_each_gt(gt, xe, id) 644 xe_gt_idle_disable_c6(gt); 645 646 if (xe->d3cold.allowed) { 647 err = xe_pcode_ready(xe, true); 648 if (err) 649 goto out; 650 651 xe_display_pm_resume_early(xe); 652 653 /* 654 * This only restores pinned memory which is the memory 655 * required for the GT(s) to resume. 656 */ 657 err = xe_bo_restore_early(xe); 658 if (err) 659 goto out; 660 } 661 662 xe_i2c_pm_resume(xe, xe->d3cold.allowed); 663 664 xe_irq_resume(xe); 665 666 for_each_gt(gt, xe, id) 667 xe_gt_resume(gt); 668 669 xe_display_pm_runtime_resume(xe); 670 671 if (xe->d3cold.allowed) { 672 err = xe_bo_restore_late(xe); 673 if (err) 674 goto out; 675 } 676 677 xe_pxp_pm_resume(xe->pxp); 678 679 if (IS_VF_CCS_READY(xe)) 680 xe_sriov_vf_ccs_register_context(xe); 681 682 if (xe->d3cold.allowed) 683 xe_late_bind_fw_load(&xe->late_bind); 684 685out: 686 xe_rpm_lockmap_release(xe); 687 xe_pm_write_callback_task(xe, NULL); 688 return err; 689} 690 691/* 692 * For places where resume is synchronous it can be quite easy to deadlock 693 * if we are not careful. Also in practice it might be quite timing 694 * sensitive to ever see the 0 -> 1 transition with the callers locks 695 * held, so deadlocks might exist but are hard for lockdep to ever see. 696 * With this in mind, help lockdep learn about the potentially scary 697 * stuff that can happen inside the runtime_resume callback by acquiring 698 * a dummy lock (it doesn't protect anything and gets compiled out on 699 * non-debug builds). Lockdep then only needs to see the 700 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 701 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 702 * For example if the (callers_locks) are ever grabbed in the 703 * runtime_resume callback, lockdep should give us a nice splat. 704 */ 705static void xe_rpm_might_enter_cb(const struct xe_device *xe) 706{ 707 xe_rpm_lockmap_acquire(xe); 708 xe_rpm_lockmap_release(xe); 709} 710 711/* 712 * Prime the lockdep maps for known locking orders that need to 713 * be supported but that may not always occur on all systems. 714 */ 715static void xe_pm_runtime_lockdep_prime(void) 716{ 717 struct dma_resv lockdep_resv; 718 719 dma_resv_init(&lockdep_resv); 720 lock_map_acquire(&xe_pm_runtime_d3cold_map); 721 /* D3Cold takes the dma_resv locks to evict bos */ 722 dma_resv_lock(&lockdep_resv, NULL); 723 dma_resv_unlock(&lockdep_resv); 724 lock_map_release(&xe_pm_runtime_d3cold_map); 725 726 /* Shrinkers might like to wake up the device under reclaim. */ 727 fs_reclaim_acquire(GFP_KERNEL); 728 lock_map_acquire(&xe_pm_runtime_nod3cold_map); 729 lock_map_release(&xe_pm_runtime_nod3cold_map); 730 fs_reclaim_release(GFP_KERNEL); 731} 732 733/** 734 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously 735 * @xe: xe device instance 736 * 737 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is 738 * be preferred over direct usage of this function. Manual get/put handling 739 * should only be used when the function contains goto-based logic which 740 * can break scope-based handling, or when the lifetime of the runtime PM 741 * reference does not match a specific scope (e.g., runtime PM obtained in one 742 * function and released in a different one). 743 */ 744void xe_pm_runtime_get(struct xe_device *xe) 745{ 746 trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); 747 pm_runtime_get_noresume(xe->drm.dev); 748 749 if (xe_pm_read_callback_task(xe) == current) 750 return; 751 752 xe_rpm_might_enter_cb(xe); 753 pm_runtime_resume(xe->drm.dev); 754} 755 756/** 757 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle 758 * @xe: xe device instance 759 */ 760void xe_pm_runtime_put(struct xe_device *xe) 761{ 762 trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); 763 if (xe_pm_read_callback_task(xe) == current) { 764 pm_runtime_put_noidle(xe->drm.dev); 765 } else { 766 pm_runtime_mark_last_busy(xe->drm.dev); 767 pm_runtime_put(xe->drm.dev); 768 } 769} 770 771/** 772 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl 773 * @xe: xe device instance 774 * 775 * When possible, scope-based runtime PM (through 776 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this 777 * function. Manual get/put handling should only be used when the function 778 * contains goto-based logic which can break scope-based handling, or when the 779 * lifetime of the runtime PM reference does not match a specific scope (e.g., 780 * runtime PM obtained in one function and released in a different one). 781 * 782 * Returns: Any number greater than or equal to 0 for success, negative error 783 * code otherwise. 784 */ 785int xe_pm_runtime_get_ioctl(struct xe_device *xe) 786{ 787 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); 788 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 789 return -ELOOP; 790 791 xe_rpm_might_enter_cb(xe); 792 return pm_runtime_get_sync(xe->drm.dev); 793} 794 795/** 796 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active 797 * @xe: xe device instance 798 * 799 * Return: True if device is awake (regardless the previous number of references) 800 * and a new reference was taken, false otherwise. 801 */ 802bool xe_pm_runtime_get_if_active(struct xe_device *xe) 803{ 804 return pm_runtime_get_if_active(xe->drm.dev) > 0; 805} 806 807/** 808 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken 809 * @xe: xe device instance 810 * 811 * Return: True if device is awake, a previous reference had been already taken, 812 * and a new reference was now taken, false otherwise. 813 */ 814bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) 815{ 816 if (xe_pm_read_callback_task(xe) == current) { 817 /* The device is awake, grab the ref and move on */ 818 pm_runtime_get_noresume(xe->drm.dev); 819 return true; 820 } 821 822 return pm_runtime_get_if_in_use(xe->drm.dev) > 0; 823} 824 825/* 826 * Very unreliable! Should only be used to suppress the false positive case 827 * in the missing outer rpm protection warning. 828 */ 829static bool xe_pm_suspending_or_resuming(struct xe_device *xe) 830{ 831#ifdef CONFIG_PM 832 struct device *dev = xe->drm.dev; 833 834 return dev->power.runtime_status == RPM_SUSPENDING || 835 dev->power.runtime_status == RPM_RESUMING || 836 pm_suspend_in_progress(); 837#else 838 return false; 839#endif 840} 841 842/** 843 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming 844 * @xe: xe device instance 845 * 846 * This function should be used in inner places where it is surely already 847 * protected by outer-bound callers of `xe_pm_runtime_get`. 848 * It will warn if not protected. 849 * The reference should be put back after this function regardless, since it 850 * will always bump the usage counter, regardless. 851 * 852 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) 853 * is be preferred over direct usage of this function. Manual get/put handling 854 * should only be used when the function contains goto-based logic which can 855 * break scope-based handling, or when the lifetime of the runtime PM reference 856 * does not match a specific scope (e.g., runtime PM obtained in one function 857 * and released in a different one). 858 */ 859void xe_pm_runtime_get_noresume(struct xe_device *xe) 860{ 861 bool ref; 862 863 ref = xe_pm_runtime_get_if_in_use(xe); 864 865 if (!ref) { 866 pm_runtime_get_noresume(xe->drm.dev); 867 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), 868 "Missing outer runtime PM protection\n"); 869 } 870} 871 872/** 873 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. 874 * @xe: xe device instance 875 * 876 * Returns: True if device is awake and the reference was taken, false otherwise. 877 */ 878bool xe_pm_runtime_resume_and_get(struct xe_device *xe) 879{ 880 if (xe_pm_read_callback_task(xe) == current) { 881 /* The device is awake, grab the ref and move on */ 882 pm_runtime_get_noresume(xe->drm.dev); 883 return true; 884 } 885 886 xe_rpm_might_enter_cb(xe); 887 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 888} 889 890/** 891 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge 892 * @xe: xe device instance 893 */ 894void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 895{ 896 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 897 struct pci_dev *bridge = pci_upstream_bridge(pdev); 898 899 if (!bridge) 900 return; 901 902 if (!bridge->driver) { 903 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 904 device_set_pm_not_required(&pdev->dev); 905 } 906} 907 908/** 909 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 910 * @xe: xe device instance 911 * @threshold: VRAM size in MiB for the D3cold threshold 912 * 913 * Return: 914 * * 0 - success 915 * * -EINVAL - invalid argument 916 */ 917int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 918{ 919 struct ttm_resource_manager *man; 920 u32 vram_total_mb = 0; 921 int i; 922 923 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 924 man = ttm_manager_type(&xe->ttm, i); 925 if (man) 926 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 927 } 928 929 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 930 931 if (threshold > vram_total_mb) 932 return -EINVAL; 933 934 mutex_lock(&xe->d3cold.lock); 935 xe->d3cold.vram_threshold = threshold; 936 mutex_unlock(&xe->d3cold.lock); 937 938 return 0; 939} 940 941/** 942 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed 943 * @xe: xe device instance 944 * 945 * To be called during runtime_pm idle callback. 946 * Check for all the D3Cold conditions ahead of runtime suspend. 947 */ 948void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 949{ 950 struct ttm_resource_manager *man; 951 u32 total_vram_used_mb = 0; 952 u64 vram_used; 953 int i; 954 955 if (!xe->d3cold.capable) { 956 xe->d3cold.allowed = false; 957 return; 958 } 959 960 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 961 man = ttm_manager_type(&xe->ttm, i); 962 if (man) { 963 vram_used = ttm_resource_manager_usage(man); 964 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 965 } 966 } 967 968 mutex_lock(&xe->d3cold.lock); 969 970 if (total_vram_used_mb < xe->d3cold.vram_threshold) 971 xe->d3cold.allowed = true; 972 else 973 xe->d3cold.allowed = false; 974 975 mutex_unlock(&xe->d3cold.lock); 976} 977 978/** 979 * xe_pm_module_init() - Perform xe_pm specific module initialization. 980 * 981 * Return: 0 on success. Currently doesn't fail. 982 */ 983int __init xe_pm_module_init(void) 984{ 985 xe_pm_runtime_lockdep_prime(); 986 return 0; 987}