drivers/gpu/drm/amd/amdgpu/amdgpu_device.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
at master 216 kB view raw
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include <linux/aperture.h>
  30#include <linux/power_supply.h>
  31#include <linux/kthread.h>
  32#include <linux/module.h>
  33#include <linux/console.h>
  34#include <linux/slab.h>
  35#include <linux/iommu.h>
  36#include <linux/pci.h>
  37#include <linux/pci-p2pdma.h>
  38#include <linux/apple-gmux.h>
  39
  40#include <drm/drm_atomic_helper.h>
  41#include <drm/drm_client_event.h>
  42#include <drm/drm_crtc_helper.h>
  43#include <drm/drm_probe_helper.h>
  44#include <drm/amdgpu_drm.h>
  45#include <linux/device.h>
  46#include <linux/vgaarb.h>
  47#include <linux/vga_switcheroo.h>
  48#include <linux/efi.h>
  49#include "amdgpu.h"
  50#include "amdgpu_trace.h"
  51#include "amdgpu_i2c.h"
  52#include "atom.h"
  53#include "amdgpu_atombios.h"
  54#include "amdgpu_atomfirmware.h"
  55#include "amd_pcie.h"
  56#ifdef CONFIG_DRM_AMDGPU_SI
  57#include "si.h"
  58#endif
  59#ifdef CONFIG_DRM_AMDGPU_CIK
  60#include "cik.h"
  61#endif
  62#include "vi.h"
  63#include "soc15.h"
  64#include "nv.h"
  65#include "bif/bif_4_1_d.h"
  66#include <linux/firmware.h>
  67#include "amdgpu_vf_error.h"
  68
  69#include "amdgpu_amdkfd.h"
  70#include "amdgpu_pm.h"
  71
  72#include "amdgpu_xgmi.h"
  73#include "amdgpu_ras.h"
  74#include "amdgpu_ras_mgr.h"
  75#include "amdgpu_pmu.h"
  76#include "amdgpu_fru_eeprom.h"
  77#include "amdgpu_reset.h"
  78#include "amdgpu_virt.h"
  79#include "amdgpu_dev_coredump.h"
  80
  81#include <linux/suspend.h>
  82#include <drm/task_barrier.h>
  83#include <linux/pm_runtime.h>
  84
  85#include <drm/drm_drv.h>
  86
  87#if IS_ENABLED(CONFIG_X86)
  88#include <asm/intel-family.h>
  89#include <asm/cpu_device_id.h>
  90#endif
  91
  92MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  93MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  94MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  95MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  96MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  97MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  98MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  99MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
 100
 101#define AMDGPU_RESUME_MS		2000
 102#define AMDGPU_MAX_RETRY_LIMIT		2
 103#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 104#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
 105#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
 106#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
 107
 108#define AMDGPU_VBIOS_SKIP (1U << 0)
 109#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
 110
 111static const struct drm_driver amdgpu_kms_driver;
 112
 113const char *amdgpu_asic_name[] = {
 114	"TAHITI",
 115	"PITCAIRN",
 116	"VERDE",
 117	"OLAND",
 118	"HAINAN",
 119	"BONAIRE",
 120	"KAVERI",
 121	"KABINI",
 122	"HAWAII",
 123	"MULLINS",
 124	"TOPAZ",
 125	"TONGA",
 126	"FIJI",
 127	"CARRIZO",
 128	"STONEY",
 129	"POLARIS10",
 130	"POLARIS11",
 131	"POLARIS12",
 132	"VEGAM",
 133	"VEGA10",
 134	"VEGA12",
 135	"VEGA20",
 136	"RAVEN",
 137	"ARCTURUS",
 138	"RENOIR",
 139	"ALDEBARAN",
 140	"NAVI10",
 141	"CYAN_SKILLFISH",
 142	"NAVI14",
 143	"NAVI12",
 144	"SIENNA_CICHLID",
 145	"NAVY_FLOUNDER",
 146	"VANGOGH",
 147	"DIMGREY_CAVEFISH",
 148	"BEIGE_GOBY",
 149	"YELLOW_CARP",
 150	"IP DISCOVERY",
 151	"LAST",
 152};
 153
 154#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
 155/*
 156 * Default init level where all blocks are expected to be initialized. This is
 157 * the level of initialization expected by default and also after a full reset
 158 * of the device.
 159 */
 160struct amdgpu_init_level amdgpu_init_default = {
 161	.level = AMDGPU_INIT_LEVEL_DEFAULT,
 162	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
 163};
 164
 165struct amdgpu_init_level amdgpu_init_recovery = {
 166	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
 167	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
 168};
 169
 170/*
 171 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
 172 * is used for cases like reset on initialization where the entire hive needs to
 173 * be reset before first use.
 174 */
 175struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
 176	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
 177	.hwini_ip_block_mask =
 178		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
 179		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
 180		BIT(AMD_IP_BLOCK_TYPE_PSP)
 181};
 182
 183static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
 184static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
 185static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
 186
 187static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
 188
 189static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
 190					     enum amd_ip_block_type block)
 191{
 192	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
 193}
 194
 195void amdgpu_set_init_level(struct amdgpu_device *adev,
 196			   enum amdgpu_init_lvl_id lvl)
 197{
 198	switch (lvl) {
 199	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
 200		adev->init_lvl = &amdgpu_init_minimal_xgmi;
 201		break;
 202	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
 203		adev->init_lvl = &amdgpu_init_recovery;
 204		break;
 205	case AMDGPU_INIT_LEVEL_DEFAULT:
 206		fallthrough;
 207	default:
 208		adev->init_lvl = &amdgpu_init_default;
 209		break;
 210	}
 211}
 212
 213static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
 214static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
 215				     void *data);
 216
 217/**
 218 * DOC: pcie_replay_count
 219 *
 220 * The amdgpu driver provides a sysfs API for reporting the total number
 221 * of PCIe replays (NAKs).
 222 * The file pcie_replay_count is used for this and returns the total
 223 * number of replays as a sum of the NAKs generated and NAKs received.
 224 */
 225
 226static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 227		struct device_attribute *attr, char *buf)
 228{
 229	struct drm_device *ddev = dev_get_drvdata(dev);
 230	struct amdgpu_device *adev = drm_to_adev(ddev);
 231	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 232
 233	return sysfs_emit(buf, "%llu\n", cnt);
 234}
 235
 236static DEVICE_ATTR(pcie_replay_count, 0444,
 237		amdgpu_device_get_pcie_replay_count, NULL);
 238
 239static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
 240{
 241	int ret = 0;
 242
 243	if (amdgpu_nbio_is_replay_cnt_supported(adev))
 244		ret = sysfs_create_file(&adev->dev->kobj,
 245					&dev_attr_pcie_replay_count.attr);
 246
 247	return ret;
 248}
 249
 250static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
 251{
 252	if (amdgpu_nbio_is_replay_cnt_supported(adev))
 253		sysfs_remove_file(&adev->dev->kobj,
 254				  &dev_attr_pcie_replay_count.attr);
 255}
 256
 257static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
 258					  const struct bin_attribute *attr, char *buf,
 259					  loff_t ppos, size_t count)
 260{
 261	struct device *dev = kobj_to_dev(kobj);
 262	struct drm_device *ddev = dev_get_drvdata(dev);
 263	struct amdgpu_device *adev = drm_to_adev(ddev);
 264	ssize_t bytes_read;
 265
 266	switch (ppos) {
 267	case AMDGPU_SYS_REG_STATE_XGMI:
 268		bytes_read = amdgpu_asic_get_reg_state(
 269			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
 270		break;
 271	case AMDGPU_SYS_REG_STATE_WAFL:
 272		bytes_read = amdgpu_asic_get_reg_state(
 273			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
 274		break;
 275	case AMDGPU_SYS_REG_STATE_PCIE:
 276		bytes_read = amdgpu_asic_get_reg_state(
 277			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
 278		break;
 279	case AMDGPU_SYS_REG_STATE_USR:
 280		bytes_read = amdgpu_asic_get_reg_state(
 281			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
 282		break;
 283	case AMDGPU_SYS_REG_STATE_USR_1:
 284		bytes_read = amdgpu_asic_get_reg_state(
 285			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
 286		break;
 287	default:
 288		return -EINVAL;
 289	}
 290
 291	return bytes_read;
 292}
 293
 294static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
 295		      AMDGPU_SYS_REG_STATE_END);
 296
 297int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
 298{
 299	int ret;
 300
 301	if (!amdgpu_asic_get_reg_state_supported(adev))
 302		return 0;
 303
 304	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 305
 306	return ret;
 307}
 308
 309void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
 310{
 311	if (!amdgpu_asic_get_reg_state_supported(adev))
 312		return;
 313	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 314}
 315
 316int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
 317{
 318	int r;
 319
 320	if (ip_block->version->funcs->suspend) {
 321		r = ip_block->version->funcs->suspend(ip_block);
 322		if (r) {
 323			dev_err(ip_block->adev->dev,
 324				"suspend of IP block <%s> failed %d\n",
 325				ip_block->version->funcs->name, r);
 326			return r;
 327		}
 328	}
 329
 330	ip_block->status.hw = false;
 331	return 0;
 332}
 333
 334int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
 335{
 336	int r;
 337
 338	if (ip_block->version->funcs->resume) {
 339		r = ip_block->version->funcs->resume(ip_block);
 340		if (r) {
 341			dev_err(ip_block->adev->dev,
 342				"resume of IP block <%s> failed %d\n",
 343				ip_block->version->funcs->name, r);
 344			return r;
 345		}
 346	}
 347
 348	ip_block->status.hw = true;
 349	return 0;
 350}
 351
 352/**
 353 * DOC: board_info
 354 *
 355 * The amdgpu driver provides a sysfs API for giving board related information.
 356 * It provides the form factor information in the format
 357 *
 358 *   type : form factor
 359 *
 360 * Possible form factor values
 361 *
 362 * - "cem"		- PCIE CEM card
 363 * - "oam"		- Open Compute Accelerator Module
 364 * - "unknown"	- Not known
 365 *
 366 */
 367
 368static ssize_t amdgpu_device_get_board_info(struct device *dev,
 369					    struct device_attribute *attr,
 370					    char *buf)
 371{
 372	struct drm_device *ddev = dev_get_drvdata(dev);
 373	struct amdgpu_device *adev = drm_to_adev(ddev);
 374	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
 375	const char *pkg;
 376
 377	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
 378		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
 379
 380	switch (pkg_type) {
 381	case AMDGPU_PKG_TYPE_CEM:
 382		pkg = "cem";
 383		break;
 384	case AMDGPU_PKG_TYPE_OAM:
 385		pkg = "oam";
 386		break;
 387	default:
 388		pkg = "unknown";
 389		break;
 390	}
 391
 392	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
 393}
 394
 395static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
 396
 397static struct attribute *amdgpu_board_attrs[] = {
 398	&dev_attr_board_info.attr,
 399	NULL,
 400};
 401
 402static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
 403					     struct attribute *attr, int n)
 404{
 405	struct device *dev = kobj_to_dev(kobj);
 406	struct drm_device *ddev = dev_get_drvdata(dev);
 407	struct amdgpu_device *adev = drm_to_adev(ddev);
 408
 409	if (adev->flags & AMD_IS_APU)
 410		return 0;
 411
 412	return attr->mode;
 413}
 414
 415static const struct attribute_group amdgpu_board_attrs_group = {
 416	.attrs = amdgpu_board_attrs,
 417	.is_visible = amdgpu_board_attrs_is_visible
 418};
 419
 420static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 421
 422/**
 423 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 424 *
 425 * @adev: amdgpu device pointer
 426 *
 427 * Returns true if the device is a dGPU with ATPX power control,
 428 * otherwise return false.
 429 */
 430bool amdgpu_device_supports_px(struct amdgpu_device *adev)
 431{
 432	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 433		return true;
 434	return false;
 435}
 436
 437/**
 438 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 439 *
 440 * @adev: amdgpu device pointer
 441 *
 442 * Returns true if the device is a dGPU with ACPI power control,
 443 * otherwise return false.
 444 */
 445bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
 446{
 447	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
 448		return false;
 449
 450	if (adev->has_pr3 ||
 451	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 452		return true;
 453	return false;
 454}
 455
 456/**
 457 * amdgpu_device_supports_baco - Does the device support BACO
 458 *
 459 * @adev: amdgpu device pointer
 460 *
 461 * Return:
 462 * 1 if the device supports BACO;
 463 * 3 if the device supports MACO (only works if BACO is supported)
 464 * otherwise return 0.
 465 */
 466int amdgpu_device_supports_baco(struct amdgpu_device *adev)
 467{
 468	return amdgpu_asic_supports_baco(adev);
 469}
 470
 471void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
 472{
 473	int bamaco_support;
 474
 475	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
 476	bamaco_support = amdgpu_device_supports_baco(adev);
 477
 478	switch (amdgpu_runtime_pm) {
 479	case 2:
 480		if (bamaco_support & MACO_SUPPORT) {
 481			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 482			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
 483		} else if (bamaco_support == BACO_SUPPORT) {
 484			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 485			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
 486		}
 487		break;
 488	case 1:
 489		if (bamaco_support & BACO_SUPPORT) {
 490			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 491			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
 492		}
 493		break;
 494	case -1:
 495	case -2:
 496		if (amdgpu_device_supports_px(adev)) {
 497			/* enable PX as runtime mode */
 498			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
 499			dev_info(adev->dev, "Using ATPX for runtime pm\n");
 500		} else if (amdgpu_device_supports_boco(adev)) {
 501			/* enable boco as runtime mode */
 502			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
 503			dev_info(adev->dev, "Using BOCO for runtime pm\n");
 504		} else {
 505			if (!bamaco_support)
 506				goto no_runtime_pm;
 507
 508			switch (adev->asic_type) {
 509			case CHIP_VEGA20:
 510			case CHIP_ARCTURUS:
 511				/* BACO are not supported on vega20 and arctrus */
 512				break;
 513			case CHIP_VEGA10:
 514				/* enable BACO as runpm mode if noretry=0 */
 515				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
 516					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 517				break;
 518			default:
 519				/* enable BACO as runpm mode on CI+ */
 520				if (!amdgpu_passthrough(adev))
 521					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 522				break;
 523			}
 524
 525			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
 526				if (bamaco_support & MACO_SUPPORT) {
 527					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 528					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
 529				} else {
 530					dev_info(adev->dev, "Using BACO for runtime pm\n");
 531				}
 532			}
 533		}
 534		break;
 535	case 0:
 536		dev_info(adev->dev, "runtime pm is manually disabled\n");
 537		break;
 538	default:
 539		break;
 540	}
 541
 542no_runtime_pm:
 543	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
 544		dev_info(adev->dev, "Runtime PM not available\n");
 545}
 546/**
 547 * amdgpu_device_supports_smart_shift - Is the device dGPU with
 548 * smart shift support
 549 *
 550 * @adev: amdgpu device pointer
 551 *
 552 * Returns true if the device is a dGPU with Smart Shift support,
 553 * otherwise returns false.
 554 */
 555bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
 556{
 557	return (amdgpu_device_supports_boco(adev) &&
 558		amdgpu_acpi_is_power_shift_control_supported());
 559}
 560
 561/*
 562 * VRAM access helper functions
 563 */
 564
 565/**
 566 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 567 *
 568 * @adev: amdgpu_device pointer
 569 * @pos: offset of the buffer in vram
 570 * @buf: virtual address of the buffer in system memory
 571 * @size: read/write size, sizeof(@buf) must > @size
 572 * @write: true - write to vram, otherwise - read from vram
 573 */
 574void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 575			     void *buf, size_t size, bool write)
 576{
 577	unsigned long flags;
 578	uint32_t hi = ~0, tmp = 0;
 579	uint32_t *data = buf;
 580	uint64_t last;
 581	int idx;
 582
 583	if (!drm_dev_enter(adev_to_drm(adev), &idx))
 584		return;
 585
 586	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 587
 588	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 589	for (last = pos + size; pos < last; pos += 4) {
 590		tmp = pos >> 31;
 591
 592		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 593		if (tmp != hi) {
 594			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 595			hi = tmp;
 596		}
 597		if (write)
 598			WREG32_NO_KIQ(mmMM_DATA, *data++);
 599		else
 600			*data++ = RREG32_NO_KIQ(mmMM_DATA);
 601	}
 602
 603	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 604	drm_dev_exit(idx);
 605}
 606
 607/**
 608 * amdgpu_device_aper_access - access vram by vram aperture
 609 *
 610 * @adev: amdgpu_device pointer
 611 * @pos: offset of the buffer in vram
 612 * @buf: virtual address of the buffer in system memory
 613 * @size: read/write size, sizeof(@buf) must > @size
 614 * @write: true - write to vram, otherwise - read from vram
 615 *
 616 * The return value means how many bytes have been transferred.
 617 */
 618size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 619				 void *buf, size_t size, bool write)
 620{
 621#ifdef CONFIG_64BIT
 622	void __iomem *addr;
 623	size_t count = 0;
 624	uint64_t last;
 625
 626	if (!adev->mman.aper_base_kaddr)
 627		return 0;
 628
 629	last = min(pos + size, adev->gmc.visible_vram_size);
 630	if (last > pos) {
 631		addr = adev->mman.aper_base_kaddr + pos;
 632		count = last - pos;
 633
 634		if (write) {
 635			memcpy_toio(addr, buf, count);
 636			/* Make sure HDP write cache flush happens without any reordering
 637			 * after the system memory contents are sent over PCIe device
 638			 */
 639			mb();
 640			amdgpu_device_flush_hdp(adev, NULL);
 641		} else {
 642			amdgpu_device_invalidate_hdp(adev, NULL);
 643			/* Make sure HDP read cache is invalidated before issuing a read
 644			 * to the PCIe device
 645			 */
 646			mb();
 647			memcpy_fromio(buf, addr, count);
 648		}
 649
 650	}
 651
 652	return count;
 653#else
 654	return 0;
 655#endif
 656}
 657
 658/**
 659 * amdgpu_device_vram_access - read/write a buffer in vram
 660 *
 661 * @adev: amdgpu_device pointer
 662 * @pos: offset of the buffer in vram
 663 * @buf: virtual address of the buffer in system memory
 664 * @size: read/write size, sizeof(@buf) must > @size
 665 * @write: true - write to vram, otherwise - read from vram
 666 */
 667void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 668			       void *buf, size_t size, bool write)
 669{
 670	size_t count;
 671
 672	/* try to using vram apreature to access vram first */
 673	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 674	size -= count;
 675	if (size) {
 676		/* using MM to access rest vram */
 677		pos += count;
 678		buf += count;
 679		amdgpu_device_mm_access(adev, pos, buf, size, write);
 680	}
 681}
 682
 683/*
 684 * register access helper functions.
 685 */
 686
 687/* Check if hw access should be skipped because of hotplug or device error */
 688bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 689{
 690	if (adev->no_hw_access)
 691		return true;
 692
 693#ifdef CONFIG_LOCKDEP
 694	/*
 695	 * This is a bit complicated to understand, so worth a comment. What we assert
 696	 * here is that the GPU reset is not running on another thread in parallel.
 697	 *
 698	 * For this we trylock the read side of the reset semaphore, if that succeeds
 699	 * we know that the reset is not running in parallel.
 700	 *
 701	 * If the trylock fails we assert that we are either already holding the read
 702	 * side of the lock or are the reset thread itself and hold the write side of
 703	 * the lock.
 704	 */
 705	if (in_task()) {
 706		if (down_read_trylock(&adev->reset_domain->sem))
 707			up_read(&adev->reset_domain->sem);
 708		else
 709			lockdep_assert_held(&adev->reset_domain->sem);
 710	}
 711#endif
 712	return false;
 713}
 714
 715/**
 716 * amdgpu_device_rreg - read a memory mapped IO or indirect register
 717 *
 718 * @adev: amdgpu_device pointer
 719 * @reg: dword aligned register offset
 720 * @acc_flags: access flags which require special behavior
 721 *
 722 * Returns the 32 bit value from the offset specified.
 723 */
 724uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 725			    uint32_t reg, uint32_t acc_flags)
 726{
 727	uint32_t ret;
 728
 729	if (amdgpu_device_skip_hw_access(adev))
 730		return 0;
 731
 732	if ((reg * 4) < adev->rmmio_size) {
 733		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 734		    amdgpu_sriov_runtime(adev) &&
 735		    down_read_trylock(&adev->reset_domain->sem)) {
 736			ret = amdgpu_kiq_rreg(adev, reg, 0);
 737			up_read(&adev->reset_domain->sem);
 738		} else {
 739			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 740		}
 741	} else {
 742		ret = adev->pcie_rreg(adev, reg * 4);
 743	}
 744
 745	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 746
 747	return ret;
 748}
 749
 750/*
 751 * MMIO register read with bytes helper functions
 752 * @offset:bytes offset from MMIO start
 753 */
 754
 755/**
 756 * amdgpu_mm_rreg8 - read a memory mapped IO register
 757 *
 758 * @adev: amdgpu_device pointer
 759 * @offset: byte aligned register offset
 760 *
 761 * Returns the 8 bit value from the offset specified.
 762 */
 763uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 764{
 765	if (amdgpu_device_skip_hw_access(adev))
 766		return 0;
 767
 768	if (offset < adev->rmmio_size)
 769		return (readb(adev->rmmio + offset));
 770	BUG();
 771}
 772
 773
 774/**
 775 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
 776 *
 777 * @adev: amdgpu_device pointer
 778 * @reg: dword aligned register offset
 779 * @acc_flags: access flags which require special behavior
 780 * @xcc_id: xcc accelerated compute core id
 781 *
 782 * Returns the 32 bit value from the offset specified.
 783 */
 784uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
 785				uint32_t reg, uint32_t acc_flags,
 786				uint32_t xcc_id)
 787{
 788	uint32_t ret, rlcg_flag;
 789
 790	if (amdgpu_device_skip_hw_access(adev))
 791		return 0;
 792
 793	if ((reg * 4) < adev->rmmio_size) {
 794		if (amdgpu_sriov_vf(adev) &&
 795		    !amdgpu_sriov_runtime(adev) &&
 796		    adev->gfx.rlc.rlcg_reg_access_supported &&
 797		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 798							 GC_HWIP, false,
 799							 &rlcg_flag)) {
 800			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
 801		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 802		    amdgpu_sriov_runtime(adev) &&
 803		    down_read_trylock(&adev->reset_domain->sem)) {
 804			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
 805			up_read(&adev->reset_domain->sem);
 806		} else {
 807			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 808		}
 809	} else {
 810		ret = adev->pcie_rreg(adev, reg * 4);
 811	}
 812
 813	return ret;
 814}
 815
 816/*
 817 * MMIO register write with bytes helper functions
 818 * @offset:bytes offset from MMIO start
 819 * @value: the value want to be written to the register
 820 */
 821
 822/**
 823 * amdgpu_mm_wreg8 - read a memory mapped IO register
 824 *
 825 * @adev: amdgpu_device pointer
 826 * @offset: byte aligned register offset
 827 * @value: 8 bit value to write
 828 *
 829 * Writes the value specified to the offset specified.
 830 */
 831void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 832{
 833	if (amdgpu_device_skip_hw_access(adev))
 834		return;
 835
 836	if (offset < adev->rmmio_size)
 837		writeb(value, adev->rmmio + offset);
 838	else
 839		BUG();
 840}
 841
 842/**
 843 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 844 *
 845 * @adev: amdgpu_device pointer
 846 * @reg: dword aligned register offset
 847 * @v: 32 bit value to write to the register
 848 * @acc_flags: access flags which require special behavior
 849 *
 850 * Writes the value specified to the offset specified.
 851 */
 852void amdgpu_device_wreg(struct amdgpu_device *adev,
 853			uint32_t reg, uint32_t v,
 854			uint32_t acc_flags)
 855{
 856	if (amdgpu_device_skip_hw_access(adev))
 857		return;
 858
 859	if ((reg * 4) < adev->rmmio_size) {
 860		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 861		    amdgpu_sriov_runtime(adev) &&
 862		    down_read_trylock(&adev->reset_domain->sem)) {
 863			amdgpu_kiq_wreg(adev, reg, v, 0);
 864			up_read(&adev->reset_domain->sem);
 865		} else {
 866			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 867		}
 868	} else {
 869		adev->pcie_wreg(adev, reg * 4, v);
 870	}
 871
 872	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 873}
 874
 875/**
 876 * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 877 *
 878 * @adev: amdgpu_device pointer
 879 * @reg: mmio/rlc register
 880 * @v: value to write
 881 * @xcc_id: xcc accelerated compute core id
 882 *
 883 * this function is invoked only for the debugfs register access
 884 */
 885void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 886			     uint32_t reg, uint32_t v,
 887			     uint32_t xcc_id)
 888{
 889	if (amdgpu_device_skip_hw_access(adev))
 890		return;
 891
 892	if (amdgpu_sriov_fullaccess(adev) &&
 893	    adev->gfx.rlc.funcs &&
 894	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
 895		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 896			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
 897	} else if ((reg * 4) >= adev->rmmio_size) {
 898		adev->pcie_wreg(adev, reg * 4, v);
 899	} else {
 900		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 901	}
 902}
 903
 904/**
 905 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
 906 *
 907 * @adev: amdgpu_device pointer
 908 * @reg: dword aligned register offset
 909 * @v: 32 bit value to write to the register
 910 * @acc_flags: access flags which require special behavior
 911 * @xcc_id: xcc accelerated compute core id
 912 *
 913 * Writes the value specified to the offset specified.
 914 */
 915void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
 916			uint32_t reg, uint32_t v,
 917			uint32_t acc_flags, uint32_t xcc_id)
 918{
 919	uint32_t rlcg_flag;
 920
 921	if (amdgpu_device_skip_hw_access(adev))
 922		return;
 923
 924	if ((reg * 4) < adev->rmmio_size) {
 925		if (amdgpu_sriov_vf(adev) &&
 926		    !amdgpu_sriov_runtime(adev) &&
 927		    adev->gfx.rlc.rlcg_reg_access_supported &&
 928		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 929							 GC_HWIP, true,
 930							 &rlcg_flag)) {
 931			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
 932		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 933		    amdgpu_sriov_runtime(adev) &&
 934		    down_read_trylock(&adev->reset_domain->sem)) {
 935			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
 936			up_read(&adev->reset_domain->sem);
 937		} else {
 938			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 939		}
 940	} else {
 941		adev->pcie_wreg(adev, reg * 4, v);
 942	}
 943}
 944
 945/**
 946 * amdgpu_device_indirect_rreg - read an indirect register
 947 *
 948 * @adev: amdgpu_device pointer
 949 * @reg_addr: indirect register address to read from
 950 *
 951 * Returns the value of indirect register @reg_addr
 952 */
 953u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 954				u32 reg_addr)
 955{
 956	unsigned long flags, pcie_index, pcie_data;
 957	void __iomem *pcie_index_offset;
 958	void __iomem *pcie_data_offset;
 959	u32 r;
 960
 961	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 962	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 963
 964	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 965	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 966	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 967
 968	writel(reg_addr, pcie_index_offset);
 969	readl(pcie_index_offset);
 970	r = readl(pcie_data_offset);
 971	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 972
 973	return r;
 974}
 975
 976u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 977				    u64 reg_addr)
 978{
 979	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 980	u32 r;
 981	void __iomem *pcie_index_offset;
 982	void __iomem *pcie_index_hi_offset;
 983	void __iomem *pcie_data_offset;
 984
 985	if (unlikely(!adev->nbio.funcs)) {
 986		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
 987		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
 988	} else {
 989		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 990		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 991	}
 992
 993	if (reg_addr >> 32) {
 994		if (unlikely(!adev->nbio.funcs))
 995			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
 996		else
 997			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 998	} else {
 999		pcie_index_hi = 0;
1000	}
1001
1002	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1003	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1004	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1005	if (pcie_index_hi != 0)
1006		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1007				pcie_index_hi * 4;
1008
1009	writel(reg_addr, pcie_index_offset);
1010	readl(pcie_index_offset);
1011	if (pcie_index_hi != 0) {
1012		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1013		readl(pcie_index_hi_offset);
1014	}
1015	r = readl(pcie_data_offset);
1016
1017	/* clear the high bits */
1018	if (pcie_index_hi != 0) {
1019		writel(0, pcie_index_hi_offset);
1020		readl(pcie_index_hi_offset);
1021	}
1022
1023	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1024
1025	return r;
1026}
1027
1028/**
1029 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
1030 *
1031 * @adev: amdgpu_device pointer
1032 * @reg_addr: indirect register address to read from
1033 *
1034 * Returns the value of indirect register @reg_addr
1035 */
1036u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1037				  u32 reg_addr)
1038{
1039	unsigned long flags, pcie_index, pcie_data;
1040	void __iomem *pcie_index_offset;
1041	void __iomem *pcie_data_offset;
1042	u64 r;
1043
1044	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1045	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1046
1047	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1048	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1049	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1050
1051	/* read low 32 bits */
1052	writel(reg_addr, pcie_index_offset);
1053	readl(pcie_index_offset);
1054	r = readl(pcie_data_offset);
1055	/* read high 32 bits */
1056	writel(reg_addr + 4, pcie_index_offset);
1057	readl(pcie_index_offset);
1058	r |= ((u64)readl(pcie_data_offset) << 32);
1059	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1060
1061	return r;
1062}
1063
1064u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1065				  u64 reg_addr)
1066{
1067	unsigned long flags, pcie_index, pcie_data;
1068	unsigned long pcie_index_hi = 0;
1069	void __iomem *pcie_index_offset;
1070	void __iomem *pcie_index_hi_offset;
1071	void __iomem *pcie_data_offset;
1072	u64 r;
1073
1074	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1075	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1076	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1077		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1078
1079	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1080	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1081	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1082	if (pcie_index_hi != 0)
1083		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1084			pcie_index_hi * 4;
1085
1086	/* read low 32 bits */
1087	writel(reg_addr, pcie_index_offset);
1088	readl(pcie_index_offset);
1089	if (pcie_index_hi != 0) {
1090		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1091		readl(pcie_index_hi_offset);
1092	}
1093	r = readl(pcie_data_offset);
1094	/* read high 32 bits */
1095	writel(reg_addr + 4, pcie_index_offset);
1096	readl(pcie_index_offset);
1097	if (pcie_index_hi != 0) {
1098		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1099		readl(pcie_index_hi_offset);
1100	}
1101	r |= ((u64)readl(pcie_data_offset) << 32);
1102
1103	/* clear the high bits */
1104	if (pcie_index_hi != 0) {
1105		writel(0, pcie_index_hi_offset);
1106		readl(pcie_index_hi_offset);
1107	}
1108
1109	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1110
1111	return r;
1112}
1113
1114/**
1115 * amdgpu_device_indirect_wreg - write an indirect register address
1116 *
1117 * @adev: amdgpu_device pointer
1118 * @reg_addr: indirect register offset
1119 * @reg_data: indirect register data
1120 *
1121 */
1122void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1123				 u32 reg_addr, u32 reg_data)
1124{
1125	unsigned long flags, pcie_index, pcie_data;
1126	void __iomem *pcie_index_offset;
1127	void __iomem *pcie_data_offset;
1128
1129	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1130	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1131
1132	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1133	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1134	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1135
1136	writel(reg_addr, pcie_index_offset);
1137	readl(pcie_index_offset);
1138	writel(reg_data, pcie_data_offset);
1139	readl(pcie_data_offset);
1140	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1141}
1142
1143void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1144				     u64 reg_addr, u32 reg_data)
1145{
1146	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1147	void __iomem *pcie_index_offset;
1148	void __iomem *pcie_index_hi_offset;
1149	void __iomem *pcie_data_offset;
1150
1151	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1152	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1153	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1154		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1155	else
1156		pcie_index_hi = 0;
1157
1158	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1159	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1160	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1161	if (pcie_index_hi != 0)
1162		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1163				pcie_index_hi * 4;
1164
1165	writel(reg_addr, pcie_index_offset);
1166	readl(pcie_index_offset);
1167	if (pcie_index_hi != 0) {
1168		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1169		readl(pcie_index_hi_offset);
1170	}
1171	writel(reg_data, pcie_data_offset);
1172	readl(pcie_data_offset);
1173
1174	/* clear the high bits */
1175	if (pcie_index_hi != 0) {
1176		writel(0, pcie_index_hi_offset);
1177		readl(pcie_index_hi_offset);
1178	}
1179
1180	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1181}
1182
1183/**
1184 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1185 *
1186 * @adev: amdgpu_device pointer
1187 * @reg_addr: indirect register offset
1188 * @reg_data: indirect register data
1189 *
1190 */
1191void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1192				   u32 reg_addr, u64 reg_data)
1193{
1194	unsigned long flags, pcie_index, pcie_data;
1195	void __iomem *pcie_index_offset;
1196	void __iomem *pcie_data_offset;
1197
1198	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1199	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1200
1201	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1202	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1203	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1204
1205	/* write low 32 bits */
1206	writel(reg_addr, pcie_index_offset);
1207	readl(pcie_index_offset);
1208	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1209	readl(pcie_data_offset);
1210	/* write high 32 bits */
1211	writel(reg_addr + 4, pcie_index_offset);
1212	readl(pcie_index_offset);
1213	writel((u32)(reg_data >> 32), pcie_data_offset);
1214	readl(pcie_data_offset);
1215	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1216}
1217
1218void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1219				   u64 reg_addr, u64 reg_data)
1220{
1221	unsigned long flags, pcie_index, pcie_data;
1222	unsigned long pcie_index_hi = 0;
1223	void __iomem *pcie_index_offset;
1224	void __iomem *pcie_index_hi_offset;
1225	void __iomem *pcie_data_offset;
1226
1227	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1228	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1229	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1230		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1231
1232	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1233	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1234	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1235	if (pcie_index_hi != 0)
1236		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1237				pcie_index_hi * 4;
1238
1239	/* write low 32 bits */
1240	writel(reg_addr, pcie_index_offset);
1241	readl(pcie_index_offset);
1242	if (pcie_index_hi != 0) {
1243		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1244		readl(pcie_index_hi_offset);
1245	}
1246	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1247	readl(pcie_data_offset);
1248	/* write high 32 bits */
1249	writel(reg_addr + 4, pcie_index_offset);
1250	readl(pcie_index_offset);
1251	if (pcie_index_hi != 0) {
1252		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1253		readl(pcie_index_hi_offset);
1254	}
1255	writel((u32)(reg_data >> 32), pcie_data_offset);
1256	readl(pcie_data_offset);
1257
1258	/* clear the high bits */
1259	if (pcie_index_hi != 0) {
1260		writel(0, pcie_index_hi_offset);
1261		readl(pcie_index_hi_offset);
1262	}
1263
1264	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1265}
1266
1267/**
1268 * amdgpu_device_get_rev_id - query device rev_id
1269 *
1270 * @adev: amdgpu_device pointer
1271 *
1272 * Return device rev_id
1273 */
1274u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1275{
1276	return adev->nbio.funcs->get_rev_id(adev);
1277}
1278
1279/**
1280 * amdgpu_invalid_rreg - dummy reg read function
1281 *
1282 * @adev: amdgpu_device pointer
1283 * @reg: offset of register
1284 *
1285 * Dummy register read function.  Used for register blocks
1286 * that certain asics don't have (all asics).
1287 * Returns the value in the register.
1288 */
1289static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1290{
1291	dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
1292	BUG();
1293	return 0;
1294}
1295
1296static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1297{
1298	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1299	BUG();
1300	return 0;
1301}
1302
1303/**
1304 * amdgpu_invalid_wreg - dummy reg write function
1305 *
1306 * @adev: amdgpu_device pointer
1307 * @reg: offset of register
1308 * @v: value to write to the register
1309 *
1310 * Dummy register read function.  Used for register blocks
1311 * that certain asics don't have (all asics).
1312 */
1313static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1314{
1315	dev_err(adev->dev,
1316		"Invalid callback to write register 0x%04X with 0x%08X\n", reg,
1317		v);
1318	BUG();
1319}
1320
1321static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1322{
1323	dev_err(adev->dev,
1324		"Invalid callback to write register 0x%llX with 0x%08X\n", reg,
1325		v);
1326	BUG();
1327}
1328
1329/**
1330 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1331 *
1332 * @adev: amdgpu_device pointer
1333 * @reg: offset of register
1334 *
1335 * Dummy register read function.  Used for register blocks
1336 * that certain asics don't have (all asics).
1337 * Returns the value in the register.
1338 */
1339static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1340{
1341	dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
1342		reg);
1343	BUG();
1344	return 0;
1345}
1346
1347static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1348{
1349	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1350	BUG();
1351	return 0;
1352}
1353
1354/**
1355 * amdgpu_invalid_wreg64 - dummy reg write function
1356 *
1357 * @adev: amdgpu_device pointer
1358 * @reg: offset of register
1359 * @v: value to write to the register
1360 *
1361 * Dummy register read function.  Used for register blocks
1362 * that certain asics don't have (all asics).
1363 */
1364static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1365{
1366	dev_err(adev->dev,
1367		"Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1368		reg, v);
1369	BUG();
1370}
1371
1372static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1373{
1374	dev_err(adev->dev,
1375		"Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1376		reg, v);
1377	BUG();
1378}
1379
1380/**
1381 * amdgpu_block_invalid_rreg - dummy reg read function
1382 *
1383 * @adev: amdgpu_device pointer
1384 * @block: offset of instance
1385 * @reg: offset of register
1386 *
1387 * Dummy register read function.  Used for register blocks
1388 * that certain asics don't have (all asics).
1389 * Returns the value in the register.
1390 */
1391static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1392					  uint32_t block, uint32_t reg)
1393{
1394	dev_err(adev->dev,
1395		"Invalid callback to read register 0x%04X in block 0x%04X\n",
1396		reg, block);
1397	BUG();
1398	return 0;
1399}
1400
1401/**
1402 * amdgpu_block_invalid_wreg - dummy reg write function
1403 *
1404 * @adev: amdgpu_device pointer
1405 * @block: offset of instance
1406 * @reg: offset of register
1407 * @v: value to write to the register
1408 *
1409 * Dummy register read function.  Used for register blocks
1410 * that certain asics don't have (all asics).
1411 */
1412static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1413				      uint32_t block,
1414				      uint32_t reg, uint32_t v)
1415{
1416	dev_err(adev->dev,
1417		"Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1418		reg, block, v);
1419	BUG();
1420}
1421
1422static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
1423{
1424	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1425		return AMDGPU_VBIOS_SKIP;
1426
1427	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
1428		return AMDGPU_VBIOS_OPTIONAL;
1429
1430	return 0;
1431}
1432
1433/**
1434 * amdgpu_device_asic_init - Wrapper for atom asic_init
1435 *
1436 * @adev: amdgpu_device pointer
1437 *
1438 * Does any asic specific work and then calls atom asic init.
1439 */
1440static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1441{
1442	uint32_t flags;
1443	bool optional;
1444	int ret;
1445
1446	amdgpu_asic_pre_asic_init(adev);
1447	flags = amdgpu_device_get_vbios_flags(adev);
1448	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
1449
1450	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1451	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1452	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
1453	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1454		amdgpu_psp_wait_for_bootloader(adev);
1455		if (optional && !adev->bios)
1456			return 0;
1457
1458		ret = amdgpu_atomfirmware_asic_init(adev, true);
1459		return ret;
1460	} else {
1461		if (optional && !adev->bios)
1462			return 0;
1463
1464		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1465	}
1466
1467	return 0;
1468}
1469
1470/**
1471 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1472 *
1473 * @adev: amdgpu_device pointer
1474 *
1475 * Allocates a scratch page of VRAM for use by various things in the
1476 * driver.
1477 */
1478static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1479{
1480	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1481				       AMDGPU_GEM_DOMAIN_VRAM |
1482				       AMDGPU_GEM_DOMAIN_GTT,
1483				       &adev->mem_scratch.robj,
1484				       &adev->mem_scratch.gpu_addr,
1485				       (void **)&adev->mem_scratch.ptr);
1486}
1487
1488/**
1489 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1490 *
1491 * @adev: amdgpu_device pointer
1492 *
1493 * Frees the VRAM scratch page.
1494 */
1495static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1496{
1497	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1498}
1499
1500/**
1501 * amdgpu_device_program_register_sequence - program an array of registers.
1502 *
1503 * @adev: amdgpu_device pointer
1504 * @registers: pointer to the register array
1505 * @array_size: size of the register array
1506 *
1507 * Programs an array or registers with and or masks.
1508 * This is a helper for setting golden registers.
1509 */
1510void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1511					     const u32 *registers,
1512					     const u32 array_size)
1513{
1514	u32 tmp, reg, and_mask, or_mask;
1515	int i;
1516
1517	if (array_size % 3)
1518		return;
1519
1520	for (i = 0; i < array_size; i += 3) {
1521		reg = registers[i + 0];
1522		and_mask = registers[i + 1];
1523		or_mask = registers[i + 2];
1524
1525		if (and_mask == 0xffffffff) {
1526			tmp = or_mask;
1527		} else {
1528			tmp = RREG32(reg);
1529			tmp &= ~and_mask;
1530			if (adev->family >= AMDGPU_FAMILY_AI)
1531				tmp |= (or_mask & and_mask);
1532			else
1533				tmp |= or_mask;
1534		}
1535		WREG32(reg, tmp);
1536	}
1537}
1538
1539/**
1540 * amdgpu_device_pci_config_reset - reset the GPU
1541 *
1542 * @adev: amdgpu_device pointer
1543 *
1544 * Resets the GPU using the pci config reset sequence.
1545 * Only applicable to asics prior to vega10.
1546 */
1547void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1548{
1549	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1550}
1551
1552/**
1553 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1554 *
1555 * @adev: amdgpu_device pointer
1556 *
1557 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1558 */
1559int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1560{
1561	return pci_reset_function(adev->pdev);
1562}
1563
1564/*
1565 * amdgpu_device_wb_*()
1566 * Writeback is the method by which the GPU updates special pages in memory
1567 * with the status of certain GPU events (fences, ring pointers,etc.).
1568 */
1569
1570/**
1571 * amdgpu_device_wb_fini - Disable Writeback and free memory
1572 *
1573 * @adev: amdgpu_device pointer
1574 *
1575 * Disables Writeback and frees the Writeback memory (all asics).
1576 * Used at driver shutdown.
1577 */
1578static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1579{
1580	if (adev->wb.wb_obj) {
1581		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1582				      &adev->wb.gpu_addr,
1583				      (void **)&adev->wb.wb);
1584		adev->wb.wb_obj = NULL;
1585	}
1586}
1587
1588/**
1589 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1590 *
1591 * @adev: amdgpu_device pointer
1592 *
1593 * Initializes writeback and allocates writeback memory (all asics).
1594 * Used at driver startup.
1595 * Returns 0 on success or an -error on failure.
1596 */
1597static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1598{
1599	int r;
1600
1601	if (adev->wb.wb_obj == NULL) {
1602		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1603		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1604					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1605					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1606					    (void **)&adev->wb.wb);
1607		if (r) {
1608			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1609			return r;
1610		}
1611
1612		adev->wb.num_wb = AMDGPU_MAX_WB;
1613		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1614
1615		/* clear wb memory */
1616		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1617	}
1618
1619	return 0;
1620}
1621
1622/**
1623 * amdgpu_device_wb_get - Allocate a wb entry
1624 *
1625 * @adev: amdgpu_device pointer
1626 * @wb: wb index
1627 *
1628 * Allocate a wb slot for use by the driver (all asics).
1629 * Returns 0 on success or -EINVAL on failure.
1630 */
1631int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1632{
1633	unsigned long flags, offset;
1634
1635	spin_lock_irqsave(&adev->wb.lock, flags);
1636	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1637	if (offset < adev->wb.num_wb) {
1638		__set_bit(offset, adev->wb.used);
1639		spin_unlock_irqrestore(&adev->wb.lock, flags);
1640		*wb = offset << 3; /* convert to dw offset */
1641		return 0;
1642	} else {
1643		spin_unlock_irqrestore(&adev->wb.lock, flags);
1644		return -EINVAL;
1645	}
1646}
1647
1648/**
1649 * amdgpu_device_wb_free - Free a wb entry
1650 *
1651 * @adev: amdgpu_device pointer
1652 * @wb: wb index
1653 *
1654 * Free a wb slot allocated for use by the driver (all asics)
1655 */
1656void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1657{
1658	unsigned long flags;
1659
1660	wb >>= 3;
1661	spin_lock_irqsave(&adev->wb.lock, flags);
1662	if (wb < adev->wb.num_wb)
1663		__clear_bit(wb, adev->wb.used);
1664	spin_unlock_irqrestore(&adev->wb.lock, flags);
1665}
1666
1667/**
1668 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1669 *
1670 * @adev: amdgpu_device pointer
1671 *
1672 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1673 * to fail, but if any of the BARs is not accessible after the size we abort
1674 * driver loading by returning -ENODEV.
1675 */
1676int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1677{
1678	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1679	struct pci_bus *root;
1680	struct resource *res;
1681	int max_size, r;
1682	unsigned int i;
1683	u16 cmd;
1684
1685	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1686		return 0;
1687
1688	/* Bypass for VF */
1689	if (amdgpu_sriov_vf(adev))
1690		return 0;
1691
1692	if (!amdgpu_rebar)
1693		return 0;
1694
1695	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1696	if ((amdgpu_runtime_pm != 0) &&
1697	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1698	    adev->pdev->device == 0x731f &&
1699	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1700		return 0;
1701
1702	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1703	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1704		dev_warn(
1705			adev->dev,
1706			"System can't access extended configuration space, please check!!\n");
1707
1708	/* skip if the bios has already enabled large BAR */
1709	if (adev->gmc.real_vram_size &&
1710	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1711		return 0;
1712
1713	/* Check if the root BUS has 64bit memory resources */
1714	root = adev->pdev->bus;
1715	while (root->parent)
1716		root = root->parent;
1717
1718	pci_bus_for_each_resource(root, res, i) {
1719		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1720		    res->start > 0x100000000ull)
1721			break;
1722	}
1723
1724	/* Trying to resize is pointless without a root hub window above 4GB */
1725	if (!res)
1726		return 0;
1727
1728	/* Limit the BAR size to what is available */
1729	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1730	if (max_size < 0)
1731		return 0;
1732	rbar_size = min(max_size, rbar_size);
1733
1734	/* Disable memory decoding while we change the BAR addresses and size */
1735	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1736	pci_write_config_word(adev->pdev, PCI_COMMAND,
1737			      cmd & ~PCI_COMMAND_MEMORY);
1738
1739	/* Tear down doorbell as resizing will release BARs */
1740	amdgpu_doorbell_fini(adev);
1741
1742	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1743				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1744								  : 1 << 2);
1745	if (r == -ENOSPC)
1746		dev_info(adev->dev,
1747			 "Not enough PCI address space for a large BAR.");
1748	else if (r && r != -ENOTSUPP)
1749		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1750
1751	/* When the doorbell or fb BAR isn't available we have no chance of
1752	 * using the device.
1753	 */
1754	r = amdgpu_doorbell_init(adev);
1755	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1756		return -ENODEV;
1757
1758	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1759
1760	return 0;
1761}
1762
1763/*
1764 * GPU helpers function.
1765 */
1766/**
1767 * amdgpu_device_need_post - check if the hw need post or not
1768 *
1769 * @adev: amdgpu_device pointer
1770 *
1771 * Check if the asic has been initialized (all asics) at driver startup
1772 * or post is needed if  hw reset is performed.
1773 * Returns true if need or false if not.
1774 */
1775bool amdgpu_device_need_post(struct amdgpu_device *adev)
1776{
1777	uint32_t reg, flags;
1778
1779	if (amdgpu_sriov_vf(adev))
1780		return false;
1781
1782	flags = amdgpu_device_get_vbios_flags(adev);
1783	if (flags & AMDGPU_VBIOS_SKIP)
1784		return false;
1785	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1786		return false;
1787
1788	if (amdgpu_passthrough(adev)) {
1789		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1790		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1791		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1792		 * vpost executed for smc version below 22.15
1793		 */
1794		if (adev->asic_type == CHIP_FIJI) {
1795			int err;
1796			uint32_t fw_ver;
1797
1798			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1799			/* force vPost if error occurred */
1800			if (err)
1801				return true;
1802
1803			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1804			release_firmware(adev->pm.fw);
1805			if (fw_ver < 0x00160e00)
1806				return true;
1807		}
1808	}
1809
1810	/* Don't post if we need to reset whole hive on init */
1811	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1812		return false;
1813
1814	if (adev->has_hw_reset) {
1815		adev->has_hw_reset = false;
1816		return true;
1817	}
1818
1819	/* bios scratch used on CIK+ */
1820	if (adev->asic_type >= CHIP_BONAIRE)
1821		return amdgpu_atombios_scratch_need_asic_init(adev);
1822
1823	/* check MEM_SIZE for older asics */
1824	reg = amdgpu_asic_get_config_memsize(adev);
1825
1826	if ((reg != 0) && (reg != 0xffffffff))
1827		return false;
1828
1829	return true;
1830}
1831
1832/*
1833 * Check whether seamless boot is supported.
1834 *
1835 * So far we only support seamless boot on DCE 3.0 or later.
1836 * If users report that it works on older ASICS as well, we may
1837 * loosen this.
1838 */
1839bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1840{
1841	switch (amdgpu_seamless) {
1842	case -1:
1843		break;
1844	case 1:
1845		return true;
1846	case 0:
1847		return false;
1848	default:
1849		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1850			amdgpu_seamless);
1851		return false;
1852	}
1853
1854	if (!(adev->flags & AMD_IS_APU))
1855		return false;
1856
1857	if (adev->mman.keep_stolen_vga_memory)
1858		return false;
1859
1860	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1861}
1862
1863/*
1864 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1865 * don't support dynamic speed switching. Until we have confirmation from Intel
1866 * that a specific host supports it, it's safer that we keep it disabled for all.
1867 *
1868 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1869 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1870 */
1871static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1872{
1873#if IS_ENABLED(CONFIG_X86)
1874	struct cpuinfo_x86 *c = &cpu_data(0);
1875
1876	/* eGPU change speeds based on USB4 fabric conditions */
1877	if (dev_is_removable(adev->dev))
1878		return true;
1879
1880	if (c->x86_vendor == X86_VENDOR_INTEL)
1881		return false;
1882#endif
1883	return true;
1884}
1885
1886static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1887{
1888	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1889	 * It's unclear if this is a platform-specific or GPU-specific issue.
1890	 * Disable ASPM on SI for the time being.
1891	 */
1892	if (adev->family == AMDGPU_FAMILY_SI)
1893		return true;
1894
1895#if IS_ENABLED(CONFIG_X86)
1896	struct cpuinfo_x86 *c = &cpu_data(0);
1897
1898	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
1899		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
1900		return false;
1901
1902	if (c->x86 == 6 &&
1903		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1904		switch (c->x86_model) {
1905		case VFM_MODEL(INTEL_ALDERLAKE):
1906		case VFM_MODEL(INTEL_ALDERLAKE_L):
1907		case VFM_MODEL(INTEL_RAPTORLAKE):
1908		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1909		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1910			return true;
1911		default:
1912			return false;
1913		}
1914	} else {
1915		return false;
1916	}
1917#else
1918	return false;
1919#endif
1920}
1921
1922/**
1923 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1924 *
1925 * @adev: amdgpu_device pointer
1926 *
1927 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1928 * be set for this device.
1929 *
1930 * Returns true if it should be used or false if not.
1931 */
1932bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1933{
1934	switch (amdgpu_aspm) {
1935	case -1:
1936		break;
1937	case 0:
1938		return false;
1939	case 1:
1940		return true;
1941	default:
1942		return false;
1943	}
1944	if (adev->flags & AMD_IS_APU)
1945		return false;
1946	if (amdgpu_device_aspm_support_quirk(adev))
1947		return false;
1948	return pcie_aspm_enabled(adev->pdev);
1949}
1950
1951/* if we get transitioned to only one device, take VGA back */
1952/**
1953 * amdgpu_device_vga_set_decode - enable/disable vga decode
1954 *
1955 * @pdev: PCI device pointer
1956 * @state: enable/disable vga decode
1957 *
1958 * Enable/disable vga decode (all asics).
1959 * Returns VGA resource flags.
1960 */
1961static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1962		bool state)
1963{
1964	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1965
1966	amdgpu_asic_set_vga_state(adev, state);
1967	if (state)
1968		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1969		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1970	else
1971		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1972}
1973
1974/**
1975 * amdgpu_device_check_block_size - validate the vm block size
1976 *
1977 * @adev: amdgpu_device pointer
1978 *
1979 * Validates the vm block size specified via module parameter.
1980 * The vm block size defines number of bits in page table versus page directory,
1981 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1982 * page table and the remaining bits are in the page directory.
1983 */
1984static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1985{
1986	/* defines number of bits in page table versus page directory,
1987	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1988	 * page table and the remaining bits are in the page directory
1989	 */
1990	if (amdgpu_vm_block_size == -1)
1991		return;
1992
1993	if (amdgpu_vm_block_size < 9) {
1994		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1995			 amdgpu_vm_block_size);
1996		amdgpu_vm_block_size = -1;
1997	}
1998}
1999
2000/**
2001 * amdgpu_device_check_vm_size - validate the vm size
2002 *
2003 * @adev: amdgpu_device pointer
2004 *
2005 * Validates the vm size in GB specified via module parameter.
2006 * The VM size is the size of the GPU virtual memory space in GB.
2007 */
2008static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
2009{
2010	/* no need to check the default value */
2011	if (amdgpu_vm_size == -1)
2012		return;
2013
2014	if (amdgpu_vm_size < 1) {
2015		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
2016			 amdgpu_vm_size);
2017		amdgpu_vm_size = -1;
2018	}
2019}
2020
2021static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
2022{
2023	struct sysinfo si;
2024	bool is_os_64 = (sizeof(void *) == 8);
2025	uint64_t total_memory;
2026	uint64_t dram_size_seven_GB = 0x1B8000000;
2027	uint64_t dram_size_three_GB = 0xB8000000;
2028
2029	if (amdgpu_smu_memory_pool_size == 0)
2030		return;
2031
2032	if (!is_os_64) {
2033		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
2034		goto def_value;
2035	}
2036	si_meminfo(&si);
2037	total_memory = (uint64_t)si.totalram * si.mem_unit;
2038
2039	if ((amdgpu_smu_memory_pool_size == 1) ||
2040		(amdgpu_smu_memory_pool_size == 2)) {
2041		if (total_memory < dram_size_three_GB)
2042			goto def_value1;
2043	} else if ((amdgpu_smu_memory_pool_size == 4) ||
2044		(amdgpu_smu_memory_pool_size == 8)) {
2045		if (total_memory < dram_size_seven_GB)
2046			goto def_value1;
2047	} else {
2048		dev_warn(adev->dev, "Smu memory pool size not supported\n");
2049		goto def_value;
2050	}
2051	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
2052
2053	return;
2054
2055def_value1:
2056	dev_warn(adev->dev, "No enough system memory\n");
2057def_value:
2058	adev->pm.smu_prv_buffer_size = 0;
2059}
2060
2061static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
2062{
2063	if (!(adev->flags & AMD_IS_APU) ||
2064	    adev->asic_type < CHIP_RAVEN)
2065		return 0;
2066
2067	switch (adev->asic_type) {
2068	case CHIP_RAVEN:
2069		if (adev->pdev->device == 0x15dd)
2070			adev->apu_flags |= AMD_APU_IS_RAVEN;
2071		if (adev->pdev->device == 0x15d8)
2072			adev->apu_flags |= AMD_APU_IS_PICASSO;
2073		break;
2074	case CHIP_RENOIR:
2075		if ((adev->pdev->device == 0x1636) ||
2076		    (adev->pdev->device == 0x164c))
2077			adev->apu_flags |= AMD_APU_IS_RENOIR;
2078		else
2079			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
2080		break;
2081	case CHIP_VANGOGH:
2082		adev->apu_flags |= AMD_APU_IS_VANGOGH;
2083		break;
2084	case CHIP_YELLOW_CARP:
2085		break;
2086	case CHIP_CYAN_SKILLFISH:
2087		if ((adev->pdev->device == 0x13FE) ||
2088		    (adev->pdev->device == 0x143F))
2089			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
2090		break;
2091	default:
2092		break;
2093	}
2094
2095	return 0;
2096}
2097
2098/**
2099 * amdgpu_device_check_arguments - validate module params
2100 *
2101 * @adev: amdgpu_device pointer
2102 *
2103 * Validates certain module parameters and updates
2104 * the associated values used by the driver (all asics).
2105 */
2106static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2107{
2108	int i;
2109
2110	if (amdgpu_sched_jobs < 4) {
2111		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2112			 amdgpu_sched_jobs);
2113		amdgpu_sched_jobs = 4;
2114	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
2115		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2116			 amdgpu_sched_jobs);
2117		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2118	}
2119
2120	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
2121		/* gart size must be greater or equal to 32M */
2122		dev_warn(adev->dev, "gart size (%d) too small\n",
2123			 amdgpu_gart_size);
2124		amdgpu_gart_size = -1;
2125	}
2126
2127	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
2128		/* gtt size must be greater or equal to 32M */
2129		dev_warn(adev->dev, "gtt size (%d) too small\n",
2130				 amdgpu_gtt_size);
2131		amdgpu_gtt_size = -1;
2132	}
2133
2134	/* valid range is between 4 and 9 inclusive */
2135	if (amdgpu_vm_fragment_size != -1 &&
2136	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
2137		dev_warn(adev->dev, "valid range is between 4 and 9\n");
2138		amdgpu_vm_fragment_size = -1;
2139	}
2140
2141	if (amdgpu_sched_hw_submission < 2) {
2142		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2143			 amdgpu_sched_hw_submission);
2144		amdgpu_sched_hw_submission = 2;
2145	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2146		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2147			 amdgpu_sched_hw_submission);
2148		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2149	}
2150
2151	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2152		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2153		amdgpu_reset_method = -1;
2154	}
2155
2156	amdgpu_device_check_smu_prv_buffer_size(adev);
2157
2158	amdgpu_device_check_vm_size(adev);
2159
2160	amdgpu_device_check_block_size(adev);
2161
2162	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2163
2164	for (i = 0; i < MAX_XCP; i++) {
2165		switch (amdgpu_enforce_isolation) {
2166		case -1:
2167		case 0:
2168		default:
2169			/* disable */
2170			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
2171			break;
2172		case 1:
2173			/* enable */
2174			adev->enforce_isolation[i] =
2175				AMDGPU_ENFORCE_ISOLATION_ENABLE;
2176			break;
2177		case 2:
2178			/* enable legacy mode */
2179			adev->enforce_isolation[i] =
2180				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
2181			break;
2182		case 3:
2183			/* enable only process isolation without submitting cleaner shader */
2184			adev->enforce_isolation[i] =
2185				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
2186			break;
2187		}
2188	}
2189
2190	return 0;
2191}
2192
2193/**
2194 * amdgpu_switcheroo_set_state - set switcheroo state
2195 *
2196 * @pdev: pci dev pointer
2197 * @state: vga_switcheroo state
2198 *
2199 * Callback for the switcheroo driver.  Suspends or resumes
2200 * the asics before or after it is powered up using ACPI methods.
2201 */
2202static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2203					enum vga_switcheroo_state state)
2204{
2205	struct drm_device *dev = pci_get_drvdata(pdev);
2206	int r;
2207
2208	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
2209	    state == VGA_SWITCHEROO_OFF)
2210		return;
2211
2212	if (state == VGA_SWITCHEROO_ON) {
2213		pr_info("switched on\n");
2214		/* don't suspend or resume card normally */
2215		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2216
2217		pci_set_power_state(pdev, PCI_D0);
2218		amdgpu_device_load_pci_state(pdev);
2219		r = pci_enable_device(pdev);
2220		if (r)
2221			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
2222				 r);
2223		amdgpu_device_resume(dev, true);
2224
2225		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2226	} else {
2227		dev_info(&pdev->dev, "switched off\n");
2228		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2229		amdgpu_device_prepare(dev);
2230		amdgpu_device_suspend(dev, true);
2231		amdgpu_device_cache_pci_state(pdev);
2232		/* Shut down the device */
2233		pci_disable_device(pdev);
2234		pci_set_power_state(pdev, PCI_D3cold);
2235		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2236	}
2237}
2238
2239/**
2240 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2241 *
2242 * @pdev: pci dev pointer
2243 *
2244 * Callback for the switcheroo driver.  Check of the switcheroo
2245 * state can be changed.
2246 * Returns true if the state can be changed, false if not.
2247 */
2248static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2249{
2250	struct drm_device *dev = pci_get_drvdata(pdev);
2251
2252       /*
2253	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2254	* locking inversion with the driver load path. And the access here is
2255	* completely racy anyway. So don't bother with locking for now.
2256	*/
2257	return atomic_read(&dev->open_count) == 0;
2258}
2259
2260static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2261	.set_gpu_state = amdgpu_switcheroo_set_state,
2262	.reprobe = NULL,
2263	.can_switch = amdgpu_switcheroo_can_switch,
2264};
2265
2266/**
2267 * amdgpu_device_ip_set_clockgating_state - set the CG state
2268 *
2269 * @dev: amdgpu_device pointer
2270 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2271 * @state: clockgating state (gate or ungate)
2272 *
2273 * Sets the requested clockgating state for all instances of
2274 * the hardware IP specified.
2275 * Returns the error code from the last instance.
2276 */
2277int amdgpu_device_ip_set_clockgating_state(void *dev,
2278					   enum amd_ip_block_type block_type,
2279					   enum amd_clockgating_state state)
2280{
2281	struct amdgpu_device *adev = dev;
2282	int i, r = 0;
2283
2284	for (i = 0; i < adev->num_ip_blocks; i++) {
2285		if (!adev->ip_blocks[i].status.valid)
2286			continue;
2287		if (adev->ip_blocks[i].version->type != block_type)
2288			continue;
2289		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2290			continue;
2291		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2292			&adev->ip_blocks[i], state);
2293		if (r)
2294			dev_err(adev->dev,
2295				"set_clockgating_state of IP block <%s> failed %d\n",
2296				adev->ip_blocks[i].version->funcs->name, r);
2297	}
2298	return r;
2299}
2300
2301/**
2302 * amdgpu_device_ip_set_powergating_state - set the PG state
2303 *
2304 * @dev: amdgpu_device pointer
2305 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2306 * @state: powergating state (gate or ungate)
2307 *
2308 * Sets the requested powergating state for all instances of
2309 * the hardware IP specified.
2310 * Returns the error code from the last instance.
2311 */
2312int amdgpu_device_ip_set_powergating_state(void *dev,
2313					   enum amd_ip_block_type block_type,
2314					   enum amd_powergating_state state)
2315{
2316	struct amdgpu_device *adev = dev;
2317	int i, r = 0;
2318
2319	for (i = 0; i < adev->num_ip_blocks; i++) {
2320		if (!adev->ip_blocks[i].status.valid)
2321			continue;
2322		if (adev->ip_blocks[i].version->type != block_type)
2323			continue;
2324		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2325			continue;
2326		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2327			&adev->ip_blocks[i], state);
2328		if (r)
2329			dev_err(adev->dev,
2330				"set_powergating_state of IP block <%s> failed %d\n",
2331				adev->ip_blocks[i].version->funcs->name, r);
2332	}
2333	return r;
2334}
2335
2336/**
2337 * amdgpu_device_ip_get_clockgating_state - get the CG state
2338 *
2339 * @adev: amdgpu_device pointer
2340 * @flags: clockgating feature flags
2341 *
2342 * Walks the list of IPs on the device and updates the clockgating
2343 * flags for each IP.
2344 * Updates @flags with the feature flags for each hardware IP where
2345 * clockgating is enabled.
2346 */
2347void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2348					    u64 *flags)
2349{
2350	int i;
2351
2352	for (i = 0; i < adev->num_ip_blocks; i++) {
2353		if (!adev->ip_blocks[i].status.valid)
2354			continue;
2355		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2356			adev->ip_blocks[i].version->funcs->get_clockgating_state(
2357				&adev->ip_blocks[i], flags);
2358	}
2359}
2360
2361/**
2362 * amdgpu_device_ip_wait_for_idle - wait for idle
2363 *
2364 * @adev: amdgpu_device pointer
2365 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2366 *
2367 * Waits for the request hardware IP to be idle.
2368 * Returns 0 for success or a negative error code on failure.
2369 */
2370int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2371				   enum amd_ip_block_type block_type)
2372{
2373	int i, r;
2374
2375	for (i = 0; i < adev->num_ip_blocks; i++) {
2376		if (!adev->ip_blocks[i].status.valid)
2377			continue;
2378		if (adev->ip_blocks[i].version->type == block_type) {
2379			if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2380				r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2381								&adev->ip_blocks[i]);
2382				if (r)
2383					return r;
2384			}
2385			break;
2386		}
2387	}
2388	return 0;
2389
2390}
2391
2392/**
2393 * amdgpu_device_ip_is_hw - is the hardware IP enabled
2394 *
2395 * @adev: amdgpu_device pointer
2396 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2397 *
2398 * Check if the hardware IP is enable or not.
2399 * Returns true if it the IP is enable, false if not.
2400 */
2401bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
2402			    enum amd_ip_block_type block_type)
2403{
2404	int i;
2405
2406	for (i = 0; i < adev->num_ip_blocks; i++) {
2407		if (adev->ip_blocks[i].version->type == block_type)
2408			return adev->ip_blocks[i].status.hw;
2409	}
2410	return false;
2411}
2412
2413/**
2414 * amdgpu_device_ip_is_valid - is the hardware IP valid
2415 *
2416 * @adev: amdgpu_device pointer
2417 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2418 *
2419 * Check if the hardware IP is valid or not.
2420 * Returns true if it the IP is valid, false if not.
2421 */
2422bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2423			       enum amd_ip_block_type block_type)
2424{
2425	int i;
2426
2427	for (i = 0; i < adev->num_ip_blocks; i++) {
2428		if (adev->ip_blocks[i].version->type == block_type)
2429			return adev->ip_blocks[i].status.valid;
2430	}
2431	return false;
2432
2433}
2434
2435/**
2436 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2437 *
2438 * @adev: amdgpu_device pointer
2439 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2440 *
2441 * Returns a pointer to the hardware IP block structure
2442 * if it exists for the asic, otherwise NULL.
2443 */
2444struct amdgpu_ip_block *
2445amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2446			      enum amd_ip_block_type type)
2447{
2448	int i;
2449
2450	for (i = 0; i < adev->num_ip_blocks; i++)
2451		if (adev->ip_blocks[i].version->type == type)
2452			return &adev->ip_blocks[i];
2453
2454	return NULL;
2455}
2456
2457/**
2458 * amdgpu_device_ip_block_version_cmp
2459 *
2460 * @adev: amdgpu_device pointer
2461 * @type: enum amd_ip_block_type
2462 * @major: major version
2463 * @minor: minor version
2464 *
2465 * return 0 if equal or greater
2466 * return 1 if smaller or the ip_block doesn't exist
2467 */
2468int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2469				       enum amd_ip_block_type type,
2470				       u32 major, u32 minor)
2471{
2472	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2473
2474	if (ip_block && ((ip_block->version->major > major) ||
2475			((ip_block->version->major == major) &&
2476			(ip_block->version->minor >= minor))))
2477		return 0;
2478
2479	return 1;
2480}
2481
2482static const char *ip_block_names[] = {
2483	[AMD_IP_BLOCK_TYPE_COMMON] = "common",
2484	[AMD_IP_BLOCK_TYPE_GMC] = "gmc",
2485	[AMD_IP_BLOCK_TYPE_IH] = "ih",
2486	[AMD_IP_BLOCK_TYPE_SMC] = "smu",
2487	[AMD_IP_BLOCK_TYPE_PSP] = "psp",
2488	[AMD_IP_BLOCK_TYPE_DCE] = "dce",
2489	[AMD_IP_BLOCK_TYPE_GFX] = "gfx",
2490	[AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
2491	[AMD_IP_BLOCK_TYPE_UVD] = "uvd",
2492	[AMD_IP_BLOCK_TYPE_VCE] = "vce",
2493	[AMD_IP_BLOCK_TYPE_ACP] = "acp",
2494	[AMD_IP_BLOCK_TYPE_VCN] = "vcn",
2495	[AMD_IP_BLOCK_TYPE_MES] = "mes",
2496	[AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
2497	[AMD_IP_BLOCK_TYPE_VPE] = "vpe",
2498	[AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
2499	[AMD_IP_BLOCK_TYPE_ISP] = "isp",
2500	[AMD_IP_BLOCK_TYPE_RAS] = "ras",
2501};
2502
2503static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
2504{
2505	int idx = (int)type;
2506
2507	return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
2508}
2509
2510/**
2511 * amdgpu_device_ip_block_add
2512 *
2513 * @adev: amdgpu_device pointer
2514 * @ip_block_version: pointer to the IP to add
2515 *
2516 * Adds the IP block driver information to the collection of IPs
2517 * on the asic.
2518 */
2519int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2520			       const struct amdgpu_ip_block_version *ip_block_version)
2521{
2522	if (!ip_block_version)
2523		return -EINVAL;
2524
2525	switch (ip_block_version->type) {
2526	case AMD_IP_BLOCK_TYPE_VCN:
2527		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2528			return 0;
2529		break;
2530	case AMD_IP_BLOCK_TYPE_JPEG:
2531		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2532			return 0;
2533		break;
2534	default:
2535		break;
2536	}
2537
2538	dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
2539		 adev->num_ip_blocks,
2540		 ip_block_name(adev, ip_block_version->type),
2541		 ip_block_version->major,
2542		 ip_block_version->minor,
2543		 ip_block_version->rev,
2544		 ip_block_version->funcs->name);
2545
2546	adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2547
2548	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2549
2550	return 0;
2551}
2552
2553/**
2554 * amdgpu_device_enable_virtual_display - enable virtual display feature
2555 *
2556 * @adev: amdgpu_device pointer
2557 *
2558 * Enabled the virtual display feature if the user has enabled it via
2559 * the module parameter virtual_display.  This feature provides a virtual
2560 * display hardware on headless boards or in virtualized environments.
2561 * This function parses and validates the configuration string specified by
2562 * the user and configures the virtual display configuration (number of
2563 * virtual connectors, crtcs, etc.) specified.
2564 */
2565static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2566{
2567	adev->enable_virtual_display = false;
2568
2569	if (amdgpu_virtual_display) {
2570		const char *pci_address_name = pci_name(adev->pdev);
2571		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2572
2573		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2574		pciaddstr_tmp = pciaddstr;
2575		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2576			pciaddname = strsep(&pciaddname_tmp, ",");
2577			if (!strcmp("all", pciaddname)
2578			    || !strcmp(pci_address_name, pciaddname)) {
2579				long num_crtc;
2580				int res = -1;
2581
2582				adev->enable_virtual_display = true;
2583
2584				if (pciaddname_tmp)
2585					res = kstrtol(pciaddname_tmp, 10,
2586						      &num_crtc);
2587
2588				if (!res) {
2589					if (num_crtc < 1)
2590						num_crtc = 1;
2591					if (num_crtc > 6)
2592						num_crtc = 6;
2593					adev->mode_info.num_crtc = num_crtc;
2594				} else {
2595					adev->mode_info.num_crtc = 1;
2596				}
2597				break;
2598			}
2599		}
2600
2601		dev_info(
2602			adev->dev,
2603			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2604			amdgpu_virtual_display, pci_address_name,
2605			adev->enable_virtual_display, adev->mode_info.num_crtc);
2606
2607		kfree(pciaddstr);
2608	}
2609}
2610
2611void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2612{
2613	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2614		adev->mode_info.num_crtc = 1;
2615		adev->enable_virtual_display = true;
2616		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
2617			 adev->enable_virtual_display,
2618			 adev->mode_info.num_crtc);
2619	}
2620}
2621
2622/**
2623 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2624 *
2625 * @adev: amdgpu_device pointer
2626 *
2627 * Parses the asic configuration parameters specified in the gpu info
2628 * firmware and makes them available to the driver for use in configuring
2629 * the asic.
2630 * Returns 0 on success, -EINVAL on failure.
2631 */
2632static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2633{
2634	const char *chip_name;
2635	int err;
2636	const struct gpu_info_firmware_header_v1_0 *hdr;
2637
2638	adev->firmware.gpu_info_fw = NULL;
2639
2640	switch (adev->asic_type) {
2641	default:
2642		return 0;
2643	case CHIP_VEGA10:
2644		chip_name = "vega10";
2645		break;
2646	case CHIP_VEGA12:
2647		chip_name = "vega12";
2648		break;
2649	case CHIP_RAVEN:
2650		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2651			chip_name = "raven2";
2652		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2653			chip_name = "picasso";
2654		else
2655			chip_name = "raven";
2656		break;
2657	case CHIP_ARCTURUS:
2658		chip_name = "arcturus";
2659		break;
2660	case CHIP_NAVI12:
2661		if (adev->discovery.bin)
2662			return 0;
2663		chip_name = "navi12";
2664		break;
2665	case CHIP_CYAN_SKILLFISH:
2666		if (adev->discovery.bin)
2667			return 0;
2668		chip_name = "cyan_skillfish";
2669		break;
2670	}
2671
2672	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2673				   AMDGPU_UCODE_OPTIONAL,
2674				   "amdgpu/%s_gpu_info.bin", chip_name);
2675	if (err) {
2676		dev_err(adev->dev,
2677			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2678			chip_name);
2679		goto out;
2680	}
2681
2682	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2683	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2684
2685	switch (hdr->version_major) {
2686	case 1:
2687	{
2688		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2689			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2690								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2691
2692		/*
2693		 * Should be dropped when DAL no longer needs it.
2694		 */
2695		if (adev->asic_type == CHIP_NAVI12)
2696			goto parse_soc_bounding_box;
2697
2698		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2699		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2700		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2701		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2702		adev->gfx.config.max_texture_channel_caches =
2703			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2704		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2705		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2706		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2707		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2708		adev->gfx.config.double_offchip_lds_buf =
2709			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2710		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2711		adev->gfx.cu_info.max_waves_per_simd =
2712			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2713		adev->gfx.cu_info.max_scratch_slots_per_cu =
2714			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2715		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2716		if (hdr->version_minor >= 1) {
2717			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2718				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2719									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2720			adev->gfx.config.num_sc_per_sh =
2721				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2722			adev->gfx.config.num_packer_per_sc =
2723				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2724		}
2725
2726parse_soc_bounding_box:
2727		/*
2728		 * soc bounding box info is not integrated in disocovery table,
2729		 * we always need to parse it from gpu info firmware if needed.
2730		 */
2731		if (hdr->version_minor == 2) {
2732			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2733				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2734									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2735			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2736		}
2737		break;
2738	}
2739	default:
2740		dev_err(adev->dev,
2741			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2742		err = -EINVAL;
2743		goto out;
2744	}
2745out:
2746	return err;
2747}
2748
2749static void amdgpu_uid_init(struct amdgpu_device *adev)
2750{
2751	/* Initialize the UID for the device */
2752	adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
2753	if (!adev->uid_info) {
2754		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
2755		return;
2756	}
2757	adev->uid_info->adev = adev;
2758}
2759
2760static void amdgpu_uid_fini(struct amdgpu_device *adev)
2761{
2762	/* Free the UID memory */
2763	kfree(adev->uid_info);
2764	adev->uid_info = NULL;
2765}
2766
2767/**
2768 * amdgpu_device_ip_early_init - run early init for hardware IPs
2769 *
2770 * @adev: amdgpu_device pointer
2771 *
2772 * Early initialization pass for hardware IPs.  The hardware IPs that make
2773 * up each asic are discovered each IP's early_init callback is run.  This
2774 * is the first stage in initializing the asic.
2775 * Returns 0 on success, negative error code on failure.
2776 */
2777static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2778{
2779	struct amdgpu_ip_block *ip_block;
2780	struct pci_dev *parent;
2781	bool total, skip_bios;
2782	uint32_t bios_flags;
2783	int i, r;
2784
2785	amdgpu_device_enable_virtual_display(adev);
2786
2787	if (amdgpu_sriov_vf(adev)) {
2788		r = amdgpu_virt_request_full_gpu(adev, true);
2789		if (r)
2790			return r;
2791
2792		r = amdgpu_virt_init_critical_region(adev);
2793		if (r)
2794			return r;
2795	}
2796
2797	switch (adev->asic_type) {
2798#ifdef CONFIG_DRM_AMDGPU_SI
2799	case CHIP_VERDE:
2800	case CHIP_TAHITI:
2801	case CHIP_PITCAIRN:
2802	case CHIP_OLAND:
2803	case CHIP_HAINAN:
2804		adev->family = AMDGPU_FAMILY_SI;
2805		r = si_set_ip_blocks(adev);
2806		if (r)
2807			return r;
2808		break;
2809#endif
2810#ifdef CONFIG_DRM_AMDGPU_CIK
2811	case CHIP_BONAIRE:
2812	case CHIP_HAWAII:
2813	case CHIP_KAVERI:
2814	case CHIP_KABINI:
2815	case CHIP_MULLINS:
2816		if (adev->flags & AMD_IS_APU)
2817			adev->family = AMDGPU_FAMILY_KV;
2818		else
2819			adev->family = AMDGPU_FAMILY_CI;
2820
2821		r = cik_set_ip_blocks(adev);
2822		if (r)
2823			return r;
2824		break;
2825#endif
2826	case CHIP_TOPAZ:
2827	case CHIP_TONGA:
2828	case CHIP_FIJI:
2829	case CHIP_POLARIS10:
2830	case CHIP_POLARIS11:
2831	case CHIP_POLARIS12:
2832	case CHIP_VEGAM:
2833	case CHIP_CARRIZO:
2834	case CHIP_STONEY:
2835		if (adev->flags & AMD_IS_APU)
2836			adev->family = AMDGPU_FAMILY_CZ;
2837		else
2838			adev->family = AMDGPU_FAMILY_VI;
2839
2840		r = vi_set_ip_blocks(adev);
2841		if (r)
2842			return r;
2843		break;
2844	default:
2845		r = amdgpu_discovery_set_ip_blocks(adev);
2846		if (r)
2847			return r;
2848		break;
2849	}
2850
2851	/* Check for IP version 9.4.3 with A0 hardware */
2852	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2853	    !amdgpu_device_get_rev_id(adev)) {
2854		dev_err(adev->dev, "Unsupported A0 hardware\n");
2855		return -ENODEV;	/* device unsupported - no device error */
2856	}
2857
2858	if (amdgpu_has_atpx() &&
2859	    (amdgpu_is_atpx_hybrid() ||
2860	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2861	    ((adev->flags & AMD_IS_APU) == 0) &&
2862	    !dev_is_removable(&adev->pdev->dev))
2863		adev->flags |= AMD_IS_PX;
2864
2865	if (!(adev->flags & AMD_IS_APU)) {
2866		parent = pcie_find_root_port(adev->pdev);
2867		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2868	}
2869
2870	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2871	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2872		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2873	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2874		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2875	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2876		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2877
2878	adev->virt.is_xgmi_node_migrate_enabled = false;
2879	if (amdgpu_sriov_vf(adev)) {
2880		adev->virt.is_xgmi_node_migrate_enabled =
2881			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2882	}
2883
2884	total = true;
2885	for (i = 0; i < adev->num_ip_blocks; i++) {
2886		ip_block = &adev->ip_blocks[i];
2887
2888		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2889			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2890				 adev->ip_blocks[i].version->funcs->name);
2891			adev->ip_blocks[i].status.valid = false;
2892		} else if (ip_block->version->funcs->early_init) {
2893			r = ip_block->version->funcs->early_init(ip_block);
2894			if (r == -ENOENT) {
2895				adev->ip_blocks[i].status.valid = false;
2896			} else if (r) {
2897				dev_err(adev->dev,
2898					"early_init of IP block <%s> failed %d\n",
2899					adev->ip_blocks[i].version->funcs->name,
2900					r);
2901				total = false;
2902			} else {
2903				adev->ip_blocks[i].status.valid = true;
2904			}
2905		} else {
2906			adev->ip_blocks[i].status.valid = true;
2907		}
2908		/* get the vbios after the asic_funcs are set up */
2909		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2910			r = amdgpu_device_parse_gpu_info_fw(adev);
2911			if (r)
2912				return r;
2913
2914			bios_flags = amdgpu_device_get_vbios_flags(adev);
2915			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2916			/* Read BIOS */
2917			if (!skip_bios) {
2918				bool optional =
2919					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2920				if (!amdgpu_get_bios(adev) && !optional)
2921					return -EINVAL;
2922
2923				if (optional && !adev->bios)
2924					dev_info(
2925						adev->dev,
2926						"VBIOS image optional, proceeding without VBIOS image");
2927
2928				if (adev->bios) {
2929					r = amdgpu_atombios_init(adev);
2930					if (r) {
2931						dev_err(adev->dev,
2932							"amdgpu_atombios_init failed\n");
2933						amdgpu_vf_error_put(
2934							adev,
2935							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2936							0, 0);
2937						return r;
2938					}
2939				}
2940			}
2941
2942			/*get pf2vf msg info at it's earliest time*/
2943			if (amdgpu_sriov_vf(adev))
2944				amdgpu_virt_init_data_exchange(adev);
2945
2946		}
2947	}
2948	if (!total)
2949		return -ENODEV;
2950
2951	if (adev->gmc.xgmi.supported)
2952		amdgpu_xgmi_early_init(adev);
2953
2954	if (amdgpu_is_multi_aid(adev))
2955		amdgpu_uid_init(adev);
2956	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2957	if (ip_block->status.valid != false)
2958		amdgpu_amdkfd_device_probe(adev);
2959
2960	adev->cg_flags &= amdgpu_cg_mask;
2961	adev->pg_flags &= amdgpu_pg_mask;
2962
2963	return 0;
2964}
2965
2966static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2967{
2968	int i, r;
2969
2970	for (i = 0; i < adev->num_ip_blocks; i++) {
2971		if (!adev->ip_blocks[i].status.sw)
2972			continue;
2973		if (adev->ip_blocks[i].status.hw)
2974			continue;
2975		if (!amdgpu_ip_member_of_hwini(
2976			    adev, adev->ip_blocks[i].version->type))
2977			continue;
2978		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2979		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2980		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2981			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2982			if (r) {
2983				dev_err(adev->dev,
2984					"hw_init of IP block <%s> failed %d\n",
2985					adev->ip_blocks[i].version->funcs->name,
2986					r);
2987				return r;
2988			}
2989			adev->ip_blocks[i].status.hw = true;
2990		}
2991	}
2992
2993	return 0;
2994}
2995
2996static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2997{
2998	int i, r;
2999
3000	for (i = 0; i < adev->num_ip_blocks; i++) {
3001		if (!adev->ip_blocks[i].status.sw)
3002			continue;
3003		if (adev->ip_blocks[i].status.hw)
3004			continue;
3005		if (!amdgpu_ip_member_of_hwini(
3006			    adev, adev->ip_blocks[i].version->type))
3007			continue;
3008		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3009		if (r) {
3010			dev_err(adev->dev,
3011				"hw_init of IP block <%s> failed %d\n",
3012				adev->ip_blocks[i].version->funcs->name, r);
3013			return r;
3014		}
3015		adev->ip_blocks[i].status.hw = true;
3016	}
3017
3018	return 0;
3019}
3020
3021static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
3022{
3023	int r = 0;
3024	int i;
3025	uint32_t smu_version;
3026
3027	if (adev->asic_type >= CHIP_VEGA10) {
3028		for (i = 0; i < adev->num_ip_blocks; i++) {
3029			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
3030				continue;
3031
3032			if (!amdgpu_ip_member_of_hwini(adev,
3033						       AMD_IP_BLOCK_TYPE_PSP))
3034				break;
3035
3036			if (!adev->ip_blocks[i].status.sw)
3037				continue;
3038
3039			/* no need to do the fw loading again if already done*/
3040			if (adev->ip_blocks[i].status.hw == true)
3041				break;
3042
3043			if (amdgpu_in_reset(adev) || adev->in_suspend) {
3044				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3045				if (r)
3046					return r;
3047			} else {
3048				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3049				if (r) {
3050					dev_err(adev->dev,
3051						"hw_init of IP block <%s> failed %d\n",
3052						adev->ip_blocks[i]
3053							.version->funcs->name,
3054						r);
3055					return r;
3056				}
3057				adev->ip_blocks[i].status.hw = true;
3058			}
3059			break;
3060		}
3061	}
3062
3063	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
3064		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
3065
3066	return r;
3067}
3068
3069static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
3070{
3071	struct drm_sched_init_args args = {
3072		.ops = &amdgpu_sched_ops,
3073		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
3074		.timeout_wq = adev->reset_domain->wq,
3075		.dev = adev->dev,
3076	};
3077	long timeout;
3078	int r, i;
3079
3080	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3081		struct amdgpu_ring *ring = adev->rings[i];
3082
3083		/* No need to setup the GPU scheduler for rings that don't need it */
3084		if (!ring || ring->no_scheduler)
3085			continue;
3086
3087		switch (ring->funcs->type) {
3088		case AMDGPU_RING_TYPE_GFX:
3089			timeout = adev->gfx_timeout;
3090			break;
3091		case AMDGPU_RING_TYPE_COMPUTE:
3092			timeout = adev->compute_timeout;
3093			break;
3094		case AMDGPU_RING_TYPE_SDMA:
3095			timeout = adev->sdma_timeout;
3096			break;
3097		default:
3098			timeout = adev->video_timeout;
3099			break;
3100		}
3101
3102		args.timeout = timeout;
3103		args.credit_limit = ring->num_hw_submission;
3104		args.score = ring->sched_score;
3105		args.name = ring->name;
3106
3107		r = drm_sched_init(&ring->sched, &args);
3108		if (r) {
3109			dev_err(adev->dev,
3110				"Failed to create scheduler on ring %s.\n",
3111				ring->name);
3112			return r;
3113		}
3114		r = amdgpu_uvd_entity_init(adev, ring);
3115		if (r) {
3116			dev_err(adev->dev,
3117				"Failed to create UVD scheduling entity on ring %s.\n",
3118				ring->name);
3119			return r;
3120		}
3121		r = amdgpu_vce_entity_init(adev, ring);
3122		if (r) {
3123			dev_err(adev->dev,
3124				"Failed to create VCE scheduling entity on ring %s.\n",
3125				ring->name);
3126			return r;
3127		}
3128	}
3129
3130	if (adev->xcp_mgr)
3131		amdgpu_xcp_update_partition_sched_list(adev);
3132
3133	return 0;
3134}
3135
3136
3137/**
3138 * amdgpu_device_ip_init - run init for hardware IPs
3139 *
3140 * @adev: amdgpu_device pointer
3141 *
3142 * Main initialization pass for hardware IPs.  The list of all the hardware
3143 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
3144 * are run.  sw_init initializes the software state associated with each IP
3145 * and hw_init initializes the hardware associated with each IP.
3146 * Returns 0 on success, negative error code on failure.
3147 */
3148static int amdgpu_device_ip_init(struct amdgpu_device *adev)
3149{
3150	bool init_badpage;
3151	int i, r;
3152
3153	r = amdgpu_ras_init(adev);
3154	if (r)
3155		return r;
3156
3157	for (i = 0; i < adev->num_ip_blocks; i++) {
3158		if (!adev->ip_blocks[i].status.valid)
3159			continue;
3160		if (adev->ip_blocks[i].version->funcs->sw_init) {
3161			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
3162			if (r) {
3163				dev_err(adev->dev,
3164					"sw_init of IP block <%s> failed %d\n",
3165					adev->ip_blocks[i].version->funcs->name,
3166					r);
3167				goto init_failed;
3168			}
3169		}
3170		adev->ip_blocks[i].status.sw = true;
3171
3172		if (!amdgpu_ip_member_of_hwini(
3173			    adev, adev->ip_blocks[i].version->type))
3174			continue;
3175
3176		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
3177			/* need to do common hw init early so everything is set up for gmc */
3178			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3179			if (r) {
3180				dev_err(adev->dev, "hw_init %d failed %d\n", i,
3181					r);
3182				goto init_failed;
3183			}
3184			adev->ip_blocks[i].status.hw = true;
3185		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3186			/* need to do gmc hw init early so we can allocate gpu mem */
3187			/* Try to reserve bad pages early */
3188			if (amdgpu_sriov_vf(adev))
3189				amdgpu_virt_exchange_data(adev);
3190
3191			r = amdgpu_device_mem_scratch_init(adev);
3192			if (r) {
3193				dev_err(adev->dev,
3194					"amdgpu_mem_scratch_init failed %d\n",
3195					r);
3196				goto init_failed;
3197			}
3198			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3199			if (r) {
3200				dev_err(adev->dev, "hw_init %d failed %d\n", i,
3201					r);
3202				goto init_failed;
3203			}
3204			r = amdgpu_device_wb_init(adev);
3205			if (r) {
3206				dev_err(adev->dev,
3207					"amdgpu_device_wb_init failed %d\n", r);
3208				goto init_failed;
3209			}
3210			adev->ip_blocks[i].status.hw = true;
3211
3212			/* right after GMC hw init, we create CSA */
3213			if (adev->gfx.mcbp) {
3214				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
3215							       AMDGPU_GEM_DOMAIN_VRAM |
3216							       AMDGPU_GEM_DOMAIN_GTT,
3217							       AMDGPU_CSA_SIZE);
3218				if (r) {
3219					dev_err(adev->dev,
3220						"allocate CSA failed %d\n", r);
3221					goto init_failed;
3222				}
3223			}
3224
3225			r = amdgpu_seq64_init(adev);
3226			if (r) {
3227				dev_err(adev->dev, "allocate seq64 failed %d\n",
3228					r);
3229				goto init_failed;
3230			}
3231		}
3232	}
3233
3234	if (amdgpu_sriov_vf(adev))
3235		amdgpu_virt_init_data_exchange(adev);
3236
3237	r = amdgpu_ib_pool_init(adev);
3238	if (r) {
3239		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
3240		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
3241		goto init_failed;
3242	}
3243
3244	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
3245	if (r)
3246		goto init_failed;
3247
3248	r = amdgpu_device_ip_hw_init_phase1(adev);
3249	if (r)
3250		goto init_failed;
3251
3252	r = amdgpu_device_fw_loading(adev);
3253	if (r)
3254		goto init_failed;
3255
3256	r = amdgpu_device_ip_hw_init_phase2(adev);
3257	if (r)
3258		goto init_failed;
3259
3260	/*
3261	 * retired pages will be loaded from eeprom and reserved here,
3262	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
3263	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
3264	 * for I2C communication which only true at this point.
3265	 *
3266	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
3267	 * failure from bad gpu situation and stop amdgpu init process
3268	 * accordingly. For other failed cases, it will still release all
3269	 * the resource and print error message, rather than returning one
3270	 * negative value to upper level.
3271	 *
3272	 * Note: theoretically, this should be called before all vram allocations
3273	 * to protect retired page from abusing
3274	 */
3275	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3276	r = amdgpu_ras_recovery_init(adev, init_badpage);
3277	if (r)
3278		goto init_failed;
3279
3280	/**
3281	 * In case of XGMI grab extra reference for reset domain for this device
3282	 */
3283	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3284		if (amdgpu_xgmi_add_device(adev) == 0) {
3285			if (!amdgpu_sriov_vf(adev)) {
3286				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3287
3288				if (WARN_ON(!hive)) {
3289					r = -ENOENT;
3290					goto init_failed;
3291				}
3292
3293				if (!hive->reset_domain ||
3294				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
3295					r = -ENOENT;
3296					amdgpu_put_xgmi_hive(hive);
3297					goto init_failed;
3298				}
3299
3300				/* Drop the early temporary reset domain we created for device */
3301				amdgpu_reset_put_reset_domain(adev->reset_domain);
3302				adev->reset_domain = hive->reset_domain;
3303				amdgpu_put_xgmi_hive(hive);
3304			}
3305		}
3306	}
3307
3308	r = amdgpu_device_init_schedulers(adev);
3309	if (r)
3310		goto init_failed;
3311
3312	if (adev->mman.buffer_funcs_ring->sched.ready)
3313		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3314
3315	/* Don't init kfd if whole hive need to be reset during init */
3316	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3317		kgd2kfd_init_zone_device(adev);
3318		amdgpu_amdkfd_device_init(adev);
3319	}
3320
3321	amdgpu_fru_get_product_info(adev);
3322
3323	if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
3324		r = amdgpu_cper_init(adev);
3325
3326init_failed:
3327
3328	return r;
3329}
3330
3331/**
3332 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3333 *
3334 * @adev: amdgpu_device pointer
3335 *
3336 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
3337 * this function before a GPU reset.  If the value is retained after a
3338 * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
3339 */
3340static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3341{
3342	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3343}
3344
3345/**
3346 * amdgpu_device_check_vram_lost - check if vram is valid
3347 *
3348 * @adev: amdgpu_device pointer
3349 *
3350 * Checks the reset magic value written to the gart pointer in VRAM.
3351 * The driver calls this after a GPU reset to see if the contents of
3352 * VRAM is lost or now.
3353 * returns true if vram is lost, false if not.
3354 */
3355static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3356{
3357	if (memcmp(adev->gart.ptr, adev->reset_magic,
3358			AMDGPU_RESET_MAGIC_NUM))
3359		return true;
3360
3361	if (!amdgpu_in_reset(adev))
3362		return false;
3363
3364	/*
3365	 * For all ASICs with baco/mode1 reset, the VRAM is
3366	 * always assumed to be lost.
3367	 */
3368	switch (amdgpu_asic_reset_method(adev)) {
3369	case AMD_RESET_METHOD_LEGACY:
3370	case AMD_RESET_METHOD_LINK:
3371	case AMD_RESET_METHOD_BACO:
3372	case AMD_RESET_METHOD_MODE1:
3373		return true;
3374	default:
3375		return false;
3376	}
3377}
3378
3379/**
3380 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3381 *
3382 * @adev: amdgpu_device pointer
3383 * @state: clockgating state (gate or ungate)
3384 *
3385 * The list of all the hardware IPs that make up the asic is walked and the
3386 * set_clockgating_state callbacks are run.
3387 * Late initialization pass enabling clockgating for hardware IPs.
3388 * Fini or suspend, pass disabling clockgating for hardware IPs.
3389 * Returns 0 on success, negative error code on failure.
3390 */
3391
3392int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3393			       enum amd_clockgating_state state)
3394{
3395	int i, j, r;
3396
3397	if (amdgpu_emu_mode == 1)
3398		return 0;
3399
3400	for (j = 0; j < adev->num_ip_blocks; j++) {
3401		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3402		if (!adev->ip_blocks[i].status.late_initialized)
3403			continue;
3404		/* skip CG for GFX, SDMA on S0ix */
3405		if (adev->in_s0ix &&
3406		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3407		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3408			continue;
3409		/* skip CG for VCE/UVD, it's handled specially */
3410		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3411		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3412		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3413		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3414		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3415			/* enable clockgating to save power */
3416			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
3417										     state);
3418			if (r) {
3419				dev_err(adev->dev,
3420					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
3421					adev->ip_blocks[i].version->funcs->name,
3422					r);
3423				return r;
3424			}
3425		}
3426	}
3427
3428	return 0;
3429}
3430
3431int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3432			       enum amd_powergating_state state)
3433{
3434	int i, j, r;
3435
3436	if (amdgpu_emu_mode == 1)
3437		return 0;
3438
3439	for (j = 0; j < adev->num_ip_blocks; j++) {
3440		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3441		if (!adev->ip_blocks[i].status.late_initialized)
3442			continue;
3443		/* skip PG for GFX, SDMA on S0ix */
3444		if (adev->in_s0ix &&
3445		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3446		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3447			continue;
3448		/* skip CG for VCE/UVD, it's handled specially */
3449		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3450		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3451		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3452		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3453		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3454			/* enable powergating to save power */
3455			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
3456											state);
3457			if (r) {
3458				dev_err(adev->dev,
3459					"set_powergating_state(gate) of IP block <%s> failed %d\n",
3460					adev->ip_blocks[i].version->funcs->name,
3461					r);
3462				return r;
3463			}
3464		}
3465	}
3466	return 0;
3467}
3468
3469static int amdgpu_device_enable_mgpu_fan_boost(void)
3470{
3471	struct amdgpu_gpu_instance *gpu_ins;
3472	struct amdgpu_device *adev;
3473	int i, ret = 0;
3474
3475	mutex_lock(&mgpu_info.mutex);
3476
3477	/*
3478	 * MGPU fan boost feature should be enabled
3479	 * only when there are two or more dGPUs in
3480	 * the system
3481	 */
3482	if (mgpu_info.num_dgpu < 2)
3483		goto out;
3484
3485	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3486		gpu_ins = &(mgpu_info.gpu_ins[i]);
3487		adev = gpu_ins->adev;
3488		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
3489		    !gpu_ins->mgpu_fan_enabled) {
3490			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3491			if (ret)
3492				break;
3493
3494			gpu_ins->mgpu_fan_enabled = 1;
3495		}
3496	}
3497
3498out:
3499	mutex_unlock(&mgpu_info.mutex);
3500
3501	return ret;
3502}
3503
3504/**
3505 * amdgpu_device_ip_late_init - run late init for hardware IPs
3506 *
3507 * @adev: amdgpu_device pointer
3508 *
3509 * Late initialization pass for hardware IPs.  The list of all the hardware
3510 * IPs that make up the asic is walked and the late_init callbacks are run.
3511 * late_init covers any special initialization that an IP requires
3512 * after all of the have been initialized or something that needs to happen
3513 * late in the init process.
3514 * Returns 0 on success, negative error code on failure.
3515 */
3516static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3517{
3518	struct amdgpu_gpu_instance *gpu_instance;
3519	int i = 0, r;
3520
3521	for (i = 0; i < adev->num_ip_blocks; i++) {
3522		if (!adev->ip_blocks[i].status.hw)
3523			continue;
3524		if (adev->ip_blocks[i].version->funcs->late_init) {
3525			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3526			if (r) {
3527				dev_err(adev->dev,
3528					"late_init of IP block <%s> failed %d\n",
3529					adev->ip_blocks[i].version->funcs->name,
3530					r);
3531				return r;
3532			}
3533		}
3534		adev->ip_blocks[i].status.late_initialized = true;
3535	}
3536
3537	r = amdgpu_ras_late_init(adev);
3538	if (r) {
3539		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
3540		return r;
3541	}
3542
3543	if (!amdgpu_reset_in_recovery(adev))
3544		amdgpu_ras_set_error_query_ready(adev, true);
3545
3546	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3547	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3548
3549	amdgpu_device_fill_reset_magic(adev);
3550
3551	r = amdgpu_device_enable_mgpu_fan_boost();
3552	if (r)
3553		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
3554
3555	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3556	if (amdgpu_passthrough(adev) &&
3557	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3558	     adev->asic_type == CHIP_ALDEBARAN))
3559		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3560
3561	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3562		mutex_lock(&mgpu_info.mutex);
3563
3564		/*
3565		 * Reset device p-state to low as this was booted with high.
3566		 *
3567		 * This should be performed only after all devices from the same
3568		 * hive get initialized.
3569		 *
3570		 * However, it's unknown how many device in the hive in advance.
3571		 * As this is counted one by one during devices initializations.
3572		 *
3573		 * So, we wait for all XGMI interlinked devices initialized.
3574		 * This may bring some delays as those devices may come from
3575		 * different hives. But that should be OK.
3576		 */
3577		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3578			for (i = 0; i < mgpu_info.num_gpu; i++) {
3579				gpu_instance = &(mgpu_info.gpu_ins[i]);
3580				if (gpu_instance->adev->flags & AMD_IS_APU)
3581					continue;
3582
3583				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3584						AMDGPU_XGMI_PSTATE_MIN);
3585				if (r) {
3586					dev_err(adev->dev,
3587						"pstate setting failed (%d).\n",
3588						r);
3589					break;
3590				}
3591			}
3592		}
3593
3594		mutex_unlock(&mgpu_info.mutex);
3595	}
3596
3597	return 0;
3598}
3599
3600static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3601{
3602	struct amdgpu_device *adev = ip_block->adev;
3603	int r;
3604
3605	if (!ip_block->version->funcs->hw_fini) {
3606		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
3607			ip_block->version->funcs->name);
3608	} else {
3609		r = ip_block->version->funcs->hw_fini(ip_block);
3610		/* XXX handle errors */
3611		if (r) {
3612			dev_dbg(adev->dev,
3613				"hw_fini of IP block <%s> failed %d\n",
3614				ip_block->version->funcs->name, r);
3615		}
3616	}
3617
3618	ip_block->status.hw = false;
3619}
3620
3621/**
3622 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3623 *
3624 * @adev: amdgpu_device pointer
3625 *
3626 * For ASICs need to disable SMC first
3627 */
3628static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3629{
3630	int i;
3631
3632	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3633		return;
3634
3635	for (i = 0; i < adev->num_ip_blocks; i++) {
3636		if (!adev->ip_blocks[i].status.hw)
3637			continue;
3638		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3639			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3640			break;
3641		}
3642	}
3643}
3644
3645static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3646{
3647	int i, r;
3648
3649	for (i = 0; i < adev->num_ip_blocks; i++) {
3650		if (!adev->ip_blocks[i].version->funcs->early_fini)
3651			continue;
3652
3653		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3654		if (r) {
3655			dev_dbg(adev->dev,
3656				"early_fini of IP block <%s> failed %d\n",
3657				adev->ip_blocks[i].version->funcs->name, r);
3658		}
3659	}
3660
3661	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3662	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3663
3664	amdgpu_amdkfd_suspend(adev, true);
3665	amdgpu_userq_suspend(adev);
3666
3667	/* Workaround for ASICs need to disable SMC first */
3668	amdgpu_device_smu_fini_early(adev);
3669
3670	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3671		if (!adev->ip_blocks[i].status.hw)
3672			continue;
3673
3674		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3675	}
3676
3677	if (amdgpu_sriov_vf(adev)) {
3678		if (amdgpu_virt_release_full_gpu(adev, false))
3679			dev_err(adev->dev,
3680				"failed to release exclusive mode on fini\n");
3681	}
3682
3683	/*
3684	 * Driver reload on the APU can fail due to firmware validation because
3685	 * the PSP is always running, as it is shared across the whole SoC.
3686	 * This same issue does not occur on dGPU because it has a mechanism
3687	 * that checks whether the PSP is running. A solution for those issues
3688	 * in the APU is to trigger a GPU reset, but this should be done during
3689	 * the unload phase to avoid adding boot latency and screen flicker.
3690	 */
3691	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
3692		r = amdgpu_asic_reset(adev);
3693		if (r)
3694			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
3695	}
3696
3697	return 0;
3698}
3699
3700/**
3701 * amdgpu_device_ip_fini - run fini for hardware IPs
3702 *
3703 * @adev: amdgpu_device pointer
3704 *
3705 * Main teardown pass for hardware IPs.  The list of all the hardware
3706 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3707 * are run.  hw_fini tears down the hardware associated with each IP
3708 * and sw_fini tears down any software state associated with each IP.
3709 * Returns 0 on success, negative error code on failure.
3710 */
3711static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3712{
3713	int i, r;
3714
3715	amdgpu_cper_fini(adev);
3716
3717	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3718		amdgpu_virt_release_ras_err_handler_data(adev);
3719
3720	if (adev->gmc.xgmi.num_physical_nodes > 1)
3721		amdgpu_xgmi_remove_device(adev);
3722
3723	amdgpu_amdkfd_device_fini_sw(adev);
3724
3725	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3726		if (!adev->ip_blocks[i].status.sw)
3727			continue;
3728
3729		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3730			amdgpu_ucode_free_bo(adev);
3731			amdgpu_free_static_csa(&adev->virt.csa_obj);
3732			amdgpu_device_wb_fini(adev);
3733			amdgpu_device_mem_scratch_fini(adev);
3734			amdgpu_ib_pool_fini(adev);
3735			amdgpu_seq64_fini(adev);
3736			amdgpu_doorbell_fini(adev);
3737		}
3738		if (adev->ip_blocks[i].version->funcs->sw_fini) {
3739			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3740			/* XXX handle errors */
3741			if (r) {
3742				dev_dbg(adev->dev,
3743					"sw_fini of IP block <%s> failed %d\n",
3744					adev->ip_blocks[i].version->funcs->name,
3745					r);
3746			}
3747		}
3748		adev->ip_blocks[i].status.sw = false;
3749		adev->ip_blocks[i].status.valid = false;
3750	}
3751
3752	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3753		if (!adev->ip_blocks[i].status.late_initialized)
3754			continue;
3755		if (adev->ip_blocks[i].version->funcs->late_fini)
3756			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3757		adev->ip_blocks[i].status.late_initialized = false;
3758	}
3759
3760	amdgpu_ras_fini(adev);
3761	amdgpu_uid_fini(adev);
3762
3763	return 0;
3764}
3765
3766/**
3767 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3768 *
3769 * @work: work_struct.
3770 */
3771static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3772{
3773	struct amdgpu_device *adev =
3774		container_of(work, struct amdgpu_device, delayed_init_work.work);
3775	int r;
3776
3777	r = amdgpu_ib_ring_tests(adev);
3778	if (r)
3779		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
3780}
3781
3782static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3783{
3784	struct amdgpu_device *adev =
3785		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3786
3787	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3788	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3789
3790	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
3791		adev->gfx.gfx_off_state = true;
3792}
3793
3794/**
3795 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3796 *
3797 * @adev: amdgpu_device pointer
3798 *
3799 * Main suspend function for hardware IPs.  The list of all the hardware
3800 * IPs that make up the asic is walked, clockgating is disabled and the
3801 * suspend callbacks are run.  suspend puts the hardware and software state
3802 * in each IP into a state suitable for suspend.
3803 * Returns 0 on success, negative error code on failure.
3804 */
3805static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3806{
3807	int i, r, rec;
3808
3809	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3810	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3811
3812	/*
3813	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3814	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3815	 * scenario. Add the missing df cstate disablement here.
3816	 */
3817	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3818		dev_warn(adev->dev, "Failed to disallow df cstate");
3819
3820	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3821		if (!adev->ip_blocks[i].status.valid)
3822			continue;
3823
3824		/* displays are handled separately */
3825		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3826			continue;
3827
3828		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3829		if (r)
3830			goto unwind;
3831	}
3832
3833	return 0;
3834unwind:
3835	rec = amdgpu_device_ip_resume_phase3(adev);
3836	if (rec)
3837		dev_err(adev->dev,
3838			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3839			rec);
3840
3841	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3842
3843	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3844	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3845
3846	return r;
3847}
3848
3849/**
3850 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3851 *
3852 * @adev: amdgpu_device pointer
3853 *
3854 * Main suspend function for hardware IPs.  The list of all the hardware
3855 * IPs that make up the asic is walked, clockgating is disabled and the
3856 * suspend callbacks are run.  suspend puts the hardware and software state
3857 * in each IP into a state suitable for suspend.
3858 * Returns 0 on success, negative error code on failure.
3859 */
3860static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3861{
3862	int i, r, rec;
3863
3864	if (adev->in_s0ix)
3865		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3866
3867	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3868		if (!adev->ip_blocks[i].status.valid)
3869			continue;
3870		/* displays are handled in phase1 */
3871		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3872			continue;
3873		/* PSP lost connection when err_event_athub occurs */
3874		if (amdgpu_ras_intr_triggered() &&
3875		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3876			adev->ip_blocks[i].status.hw = false;
3877			continue;
3878		}
3879
3880		/* skip unnecessary suspend if we do not initialize them yet */
3881		if (!amdgpu_ip_member_of_hwini(
3882			    adev, adev->ip_blocks[i].version->type))
3883			continue;
3884
3885		/* Since we skip suspend for S0i3, we need to cancel the delayed
3886		 * idle work here as the suspend callback never gets called.
3887		 */
3888		if (adev->in_s0ix &&
3889		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3890		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3891			cancel_delayed_work_sync(&adev->gfx.idle_work);
3892		/* skip suspend of gfx/mes and psp for S0ix
3893		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3894		 * like at runtime. PSP is also part of the always on hardware
3895		 * so no need to suspend it.
3896		 */
3897		if (adev->in_s0ix &&
3898		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3899		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3900		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3901			continue;
3902
3903		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3904		if (adev->in_s0ix &&
3905		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3906		     IP_VERSION(5, 0, 0)) &&
3907		    (adev->ip_blocks[i].version->type ==
3908		     AMD_IP_BLOCK_TYPE_SDMA))
3909			continue;
3910
3911		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3912		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3913		 * from this location and RLC Autoload automatically also gets loaded
3914		 * from here based on PMFW -> PSP message during re-init sequence.
3915		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3916		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3917		 */
3918		if (amdgpu_in_reset(adev) &&
3919		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3920		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3921			continue;
3922
3923		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3924		if (r)
3925			goto unwind;
3926
3927		/* handle putting the SMC in the appropriate state */
3928		if (!amdgpu_sriov_vf(adev)) {
3929			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3930				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3931				if (r) {
3932					dev_err(adev->dev,
3933						"SMC failed to set mp1 state %d, %d\n",
3934						adev->mp1_state, r);
3935					goto unwind;
3936				}
3937			}
3938		}
3939	}
3940
3941	return 0;
3942unwind:
3943	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3944	rec = amdgpu_device_ip_resume_phase1(adev);
3945	if (rec) {
3946		dev_err(adev->dev,
3947			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3948			rec);
3949		return r;
3950	}
3951
3952	rec = amdgpu_device_fw_loading(adev);
3953	if (rec) {
3954		dev_err(adev->dev,
3955			"amdgpu_device_fw_loading failed during unwind: %d\n",
3956			rec);
3957		return r;
3958	}
3959
3960	rec = amdgpu_device_ip_resume_phase2(adev);
3961	if (rec) {
3962		dev_err(adev->dev,
3963			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3964			rec);
3965		return r;
3966	}
3967
3968	return r;
3969}
3970
3971/**
3972 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3973 *
3974 * @adev: amdgpu_device pointer
3975 *
3976 * Main suspend function for hardware IPs.  The list of all the hardware
3977 * IPs that make up the asic is walked, clockgating is disabled and the
3978 * suspend callbacks are run.  suspend puts the hardware and software state
3979 * in each IP into a state suitable for suspend.
3980 * Returns 0 on success, negative error code on failure.
3981 */
3982static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3983{
3984	int r;
3985
3986	if (amdgpu_sriov_vf(adev)) {
3987		amdgpu_virt_fini_data_exchange(adev);
3988		amdgpu_virt_request_full_gpu(adev, false);
3989	}
3990
3991	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3992
3993	r = amdgpu_device_ip_suspend_phase1(adev);
3994	if (r)
3995		return r;
3996	r = amdgpu_device_ip_suspend_phase2(adev);
3997
3998	if (amdgpu_sriov_vf(adev))
3999		amdgpu_virt_release_full_gpu(adev, false);
4000
4001	return r;
4002}
4003
4004static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
4005{
4006	int i, r;
4007
4008	static enum amd_ip_block_type ip_order[] = {
4009		AMD_IP_BLOCK_TYPE_COMMON,
4010		AMD_IP_BLOCK_TYPE_GMC,
4011		AMD_IP_BLOCK_TYPE_PSP,
4012		AMD_IP_BLOCK_TYPE_IH,
4013	};
4014
4015	for (i = 0; i < adev->num_ip_blocks; i++) {
4016		int j;
4017		struct amdgpu_ip_block *block;
4018
4019		block = &adev->ip_blocks[i];
4020		block->status.hw = false;
4021
4022		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
4023
4024			if (block->version->type != ip_order[j] ||
4025				!block->status.valid)
4026				continue;
4027
4028			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
4029			if (r) {
4030				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
4031					 block->version->funcs->name);
4032				return r;
4033			}
4034			block->status.hw = true;
4035		}
4036	}
4037
4038	return 0;
4039}
4040
4041static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
4042{
4043	struct amdgpu_ip_block *block;
4044	int i, r = 0;
4045
4046	static enum amd_ip_block_type ip_order[] = {
4047		AMD_IP_BLOCK_TYPE_SMC,
4048		AMD_IP_BLOCK_TYPE_DCE,
4049		AMD_IP_BLOCK_TYPE_GFX,
4050		AMD_IP_BLOCK_TYPE_SDMA,
4051		AMD_IP_BLOCK_TYPE_MES,
4052		AMD_IP_BLOCK_TYPE_UVD,
4053		AMD_IP_BLOCK_TYPE_VCE,
4054		AMD_IP_BLOCK_TYPE_VCN,
4055		AMD_IP_BLOCK_TYPE_JPEG
4056	};
4057
4058	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
4059		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
4060
4061		if (!block)
4062			continue;
4063
4064		if (block->status.valid && !block->status.hw) {
4065			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
4066				r = amdgpu_ip_block_resume(block);
4067			} else {
4068				r = block->version->funcs->hw_init(block);
4069			}
4070
4071			if (r) {
4072				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
4073					 block->version->funcs->name);
4074				break;
4075			}
4076			block->status.hw = true;
4077		}
4078	}
4079
4080	return r;
4081}
4082
4083/**
4084 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
4085 *
4086 * @adev: amdgpu_device pointer
4087 *
4088 * First resume function for hardware IPs.  The list of all the hardware
4089 * IPs that make up the asic is walked and the resume callbacks are run for
4090 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
4091 * after a suspend and updates the software state as necessary.  This
4092 * function is also used for restoring the GPU after a GPU reset.
4093 * Returns 0 on success, negative error code on failure.
4094 */
4095static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
4096{
4097	int i, r;
4098
4099	for (i = 0; i < adev->num_ip_blocks; i++) {
4100		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
4101			continue;
4102		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4103		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4104		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4105		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
4106
4107			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
4108			if (r)
4109				return r;
4110		}
4111	}
4112
4113	return 0;
4114}
4115
4116/**
4117 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
4118 *
4119 * @adev: amdgpu_device pointer
4120 *
4121 * Second resume function for hardware IPs.  The list of all the hardware
4122 * IPs that make up the asic is walked and the resume callbacks are run for
4123 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
4124 * functional state after a suspend and updates the software state as
4125 * necessary.  This function is also used for restoring the GPU after a GPU
4126 * reset.
4127 * Returns 0 on success, negative error code on failure.
4128 */
4129static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
4130{
4131	int i, r;
4132
4133	for (i = 0; i < adev->num_ip_blocks; i++) {
4134		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
4135			continue;
4136		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4137		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4138		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4139		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
4140		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
4141			continue;
4142		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
4143		if (r)
4144			return r;
4145	}
4146
4147	return 0;
4148}
4149
4150/**
4151 * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
4152 *
4153 * @adev: amdgpu_device pointer
4154 *
4155 * Third resume function for hardware IPs.  The list of all the hardware
4156 * IPs that make up the asic is walked and the resume callbacks are run for
4157 * all DCE.  resume puts the hardware into a functional state after a suspend
4158 * and updates the software state as necessary.  This function is also used
4159 * for restoring the GPU after a GPU reset.
4160 *
4161 * Returns 0 on success, negative error code on failure.
4162 */
4163static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
4164{
4165	int i, r;
4166
4167	for (i = 0; i < adev->num_ip_blocks; i++) {
4168		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
4169			continue;
4170		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
4171			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
4172			if (r)
4173				return r;
4174		}
4175	}
4176
4177	return 0;
4178}
4179
4180/**
4181 * amdgpu_device_ip_resume - run resume for hardware IPs
4182 *
4183 * @adev: amdgpu_device pointer
4184 *
4185 * Main resume function for hardware IPs.  The hardware IPs
4186 * are split into two resume functions because they are
4187 * also used in recovering from a GPU reset and some additional
4188 * steps need to be take between them.  In this case (S3/S4) they are
4189 * run sequentially.
4190 * Returns 0 on success, negative error code on failure.
4191 */
4192static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
4193{
4194	int r;
4195
4196	r = amdgpu_device_ip_resume_phase1(adev);
4197	if (r)
4198		return r;
4199
4200	r = amdgpu_device_fw_loading(adev);
4201	if (r)
4202		return r;
4203
4204	r = amdgpu_device_ip_resume_phase2(adev);
4205
4206	if (adev->mman.buffer_funcs_ring->sched.ready)
4207		amdgpu_ttm_set_buffer_funcs_status(adev, true);
4208
4209	if (r)
4210		return r;
4211
4212	amdgpu_fence_driver_hw_init(adev);
4213
4214	r = amdgpu_device_ip_resume_phase3(adev);
4215
4216	return r;
4217}
4218
4219/**
4220 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
4221 *
4222 * @adev: amdgpu_device pointer
4223 *
4224 * Query the VBIOS data tables to determine if the board supports SR-IOV.
4225 */
4226static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
4227{
4228	if (amdgpu_sriov_vf(adev)) {
4229		if (adev->is_atom_fw) {
4230			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
4231				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4232		} else {
4233			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
4234				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4235		}
4236
4237		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
4238			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
4239	}
4240}
4241
4242/**
4243 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
4244 *
4245 * @pdev : pci device context
4246 * @asic_type: AMD asic type
4247 *
4248 * Check if there is DC (new modesetting infrastructre) support for an asic.
4249 * returns true if DC has support, false if not.
4250 */
4251bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
4252				       enum amd_asic_type asic_type)
4253{
4254	switch (asic_type) {
4255#ifdef CONFIG_DRM_AMDGPU_SI
4256	case CHIP_HAINAN:
4257#endif
4258	case CHIP_TOPAZ:
4259		/* chips with no display hardware */
4260		return false;
4261#if defined(CONFIG_DRM_AMD_DC)
4262	case CHIP_TAHITI:
4263	case CHIP_PITCAIRN:
4264	case CHIP_VERDE:
4265	case CHIP_OLAND:
4266		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
4267	case CHIP_KAVERI:
4268	case CHIP_KABINI:
4269	case CHIP_MULLINS:
4270		/*
4271		 * We have systems in the wild with these ASICs that require
4272		 * TRAVIS and NUTMEG support which is not supported with DC.
4273		 *
4274		 * Fallback to the non-DC driver here by default so as not to
4275		 * cause regressions.
4276		 */
4277		return amdgpu_dc > 0;
4278	default:
4279		return amdgpu_dc != 0;
4280#else
4281	default:
4282		if (amdgpu_dc > 0)
4283			dev_info_once(
4284				&pdev->dev,
4285				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4286		return false;
4287#endif
4288	}
4289}
4290
4291/**
4292 * amdgpu_device_has_dc_support - check if dc is supported
4293 *
4294 * @adev: amdgpu_device pointer
4295 *
4296 * Returns true for supported, false for not supported
4297 */
4298bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
4299{
4300	if (adev->enable_virtual_display ||
4301	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
4302		return false;
4303
4304	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
4305}
4306
4307static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
4308{
4309	struct amdgpu_device *adev =
4310		container_of(__work, struct amdgpu_device, xgmi_reset_work);
4311	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
4312
4313	/* It's a bug to not have a hive within this function */
4314	if (WARN_ON(!hive))
4315		return;
4316
4317	/*
4318	 * Use task barrier to synchronize all xgmi reset works across the
4319	 * hive. task_barrier_enter and task_barrier_exit will block
4320	 * until all the threads running the xgmi reset works reach
4321	 * those points. task_barrier_full will do both blocks.
4322	 */
4323	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
4324
4325		task_barrier_enter(&hive->tb);
4326		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
4327
4328		if (adev->asic_reset_res)
4329			goto fail;
4330
4331		task_barrier_exit(&hive->tb);
4332		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
4333
4334		if (adev->asic_reset_res)
4335			goto fail;
4336
4337		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
4338	} else {
4339
4340		task_barrier_full(&hive->tb);
4341		adev->asic_reset_res =  amdgpu_asic_reset(adev);
4342	}
4343
4344fail:
4345	if (adev->asic_reset_res)
4346		dev_warn(adev->dev,
4347			 "ASIC reset failed with error, %d for drm dev, %s",
4348			 adev->asic_reset_res, adev_to_drm(adev)->unique);
4349	amdgpu_put_xgmi_hive(hive);
4350}
4351
4352static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
4353{
4354	char *input = amdgpu_lockup_timeout;
4355	char *timeout_setting = NULL;
4356	int index = 0;
4357	long timeout;
4358	int ret = 0;
4359
4360	/* By default timeout for all queues is 2 sec */
4361	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4362		adev->video_timeout = msecs_to_jiffies(2000);
4363
4364	if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
4365		return 0;
4366
4367	while ((timeout_setting = strsep(&input, ",")) &&
4368	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
4369		ret = kstrtol(timeout_setting, 0, &timeout);
4370		if (ret)
4371			return ret;
4372
4373		if (timeout == 0) {
4374			index++;
4375			continue;
4376		} else if (timeout < 0) {
4377			timeout = MAX_SCHEDULE_TIMEOUT;
4378			dev_warn(adev->dev, "lockup timeout disabled");
4379			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
4380		} else {
4381			timeout = msecs_to_jiffies(timeout);
4382		}
4383
4384		switch (index++) {
4385		case 0:
4386			adev->gfx_timeout = timeout;
4387			break;
4388		case 1:
4389			adev->compute_timeout = timeout;
4390			break;
4391		case 2:
4392			adev->sdma_timeout = timeout;
4393			break;
4394		case 3:
4395			adev->video_timeout = timeout;
4396			break;
4397		default:
4398			break;
4399		}
4400	}
4401
4402	/* When only one value specified apply it to all queues. */
4403	if (index == 1)
4404		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4405			adev->video_timeout = timeout;
4406
4407	return ret;
4408}
4409
4410/**
4411 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4412 *
4413 * @adev: amdgpu_device pointer
4414 *
4415 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4416 */
4417static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4418{
4419	struct iommu_domain *domain;
4420
4421	domain = iommu_get_domain_for_dev(adev->dev);
4422	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4423		adev->ram_is_direct_mapped = true;
4424}
4425
4426#if defined(CONFIG_HSA_AMD_P2P)
4427/**
4428 * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4429 *
4430 * @adev: amdgpu_device pointer
4431 *
4432 * return if IOMMU remapping bar address
4433 */
4434static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4435{
4436	struct iommu_domain *domain;
4437
4438	domain = iommu_get_domain_for_dev(adev->dev);
4439	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4440		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
4441		return true;
4442
4443	return false;
4444}
4445#endif
4446
4447static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4448{
4449	if (amdgpu_mcbp == 1)
4450		adev->gfx.mcbp = true;
4451	else if (amdgpu_mcbp == 0)
4452		adev->gfx.mcbp = false;
4453
4454	if (amdgpu_sriov_vf(adev))
4455		adev->gfx.mcbp = true;
4456
4457	if (adev->gfx.mcbp)
4458		dev_info(adev->dev, "MCBP is enabled\n");
4459}
4460
4461static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
4462{
4463	int r;
4464
4465	r = amdgpu_atombios_sysfs_init(adev);
4466	if (r)
4467		drm_err(&adev->ddev,
4468			"registering atombios sysfs failed (%d).\n", r);
4469
4470	r = amdgpu_pm_sysfs_init(adev);
4471	if (r)
4472		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
4473
4474	r = amdgpu_ucode_sysfs_init(adev);
4475	if (r) {
4476		adev->ucode_sysfs_en = false;
4477		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
4478	} else
4479		adev->ucode_sysfs_en = true;
4480
4481	r = amdgpu_device_attr_sysfs_init(adev);
4482	if (r)
4483		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4484
4485	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4486	if (r)
4487		dev_err(adev->dev,
4488			"Could not create amdgpu board attributes\n");
4489
4490	amdgpu_fru_sysfs_init(adev);
4491	amdgpu_reg_state_sysfs_init(adev);
4492	amdgpu_xcp_sysfs_init(adev);
4493
4494	return r;
4495}
4496
4497static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
4498{
4499	if (adev->pm.sysfs_initialized)
4500		amdgpu_pm_sysfs_fini(adev);
4501	if (adev->ucode_sysfs_en)
4502		amdgpu_ucode_sysfs_fini(adev);
4503	amdgpu_device_attr_sysfs_fini(adev);
4504	amdgpu_fru_sysfs_fini(adev);
4505
4506	amdgpu_reg_state_sysfs_fini(adev);
4507	amdgpu_xcp_sysfs_fini(adev);
4508}
4509
4510/**
4511 * amdgpu_device_init - initialize the driver
4512 *
4513 * @adev: amdgpu_device pointer
4514 * @flags: driver flags
4515 *
4516 * Initializes the driver info and hw (all asics).
4517 * Returns 0 for success or an error on failure.
4518 * Called at driver startup.
4519 */
4520int amdgpu_device_init(struct amdgpu_device *adev,
4521		       uint32_t flags)
4522{
4523	struct pci_dev *pdev = adev->pdev;
4524	int r, i;
4525	bool px = false;
4526	u32 max_MBps;
4527	int tmp;
4528
4529	adev->shutdown = false;
4530	adev->flags = flags;
4531
4532	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4533		adev->asic_type = amdgpu_force_asic_type;
4534	else
4535		adev->asic_type = flags & AMD_ASIC_MASK;
4536
4537	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4538	if (amdgpu_emu_mode == 1)
4539		adev->usec_timeout *= 10;
4540	adev->gmc.gart_size = 512 * 1024 * 1024;
4541	adev->accel_working = false;
4542	adev->num_rings = 0;
4543	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4544	adev->mman.buffer_funcs = NULL;
4545	adev->mman.buffer_funcs_ring = NULL;
4546	adev->vm_manager.vm_pte_funcs = NULL;
4547	adev->vm_manager.vm_pte_num_scheds = 0;
4548	adev->gmc.gmc_funcs = NULL;
4549	adev->harvest_ip_mask = 0x0;
4550	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4551	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4552
4553	adev->smc_rreg = &amdgpu_invalid_rreg;
4554	adev->smc_wreg = &amdgpu_invalid_wreg;
4555	adev->pcie_rreg = &amdgpu_invalid_rreg;
4556	adev->pcie_wreg = &amdgpu_invalid_wreg;
4557	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4558	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4559	adev->pciep_rreg = &amdgpu_invalid_rreg;
4560	adev->pciep_wreg = &amdgpu_invalid_wreg;
4561	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4562	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4563	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4564	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4565	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4566	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4567	adev->didt_rreg = &amdgpu_invalid_rreg;
4568	adev->didt_wreg = &amdgpu_invalid_wreg;
4569	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4570	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4571	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4572	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4573
4574	dev_info(
4575		adev->dev,
4576		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4577		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4578		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4579
4580	/* mutex initialization are all done here so we
4581	 * can recall function without having locking issues
4582	 */
4583	mutex_init(&adev->firmware.mutex);
4584	mutex_init(&adev->pm.mutex);
4585	mutex_init(&adev->gfx.gpu_clock_mutex);
4586	mutex_init(&adev->srbm_mutex);
4587	mutex_init(&adev->gfx.pipe_reserve_mutex);
4588	mutex_init(&adev->gfx.gfx_off_mutex);
4589	mutex_init(&adev->gfx.partition_mutex);
4590	mutex_init(&adev->grbm_idx_mutex);
4591	mutex_init(&adev->mn_lock);
4592	mutex_init(&adev->virt.vf_errors.lock);
4593	hash_init(adev->mn_hash);
4594	mutex_init(&adev->psp.mutex);
4595	mutex_init(&adev->notifier_lock);
4596	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4597	mutex_init(&adev->benchmark_mutex);
4598	mutex_init(&adev->gfx.reset_sem_mutex);
4599	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4600	mutex_init(&adev->enforce_isolation_mutex);
4601	for (i = 0; i < MAX_XCP; ++i) {
4602		adev->isolation[i].spearhead = dma_fence_get_stub();
4603		amdgpu_sync_create(&adev->isolation[i].active);
4604		amdgpu_sync_create(&adev->isolation[i].prev);
4605	}
4606	mutex_init(&adev->gfx.userq_sch_mutex);
4607	mutex_init(&adev->gfx.workload_profile_mutex);
4608	mutex_init(&adev->vcn.workload_profile_mutex);
4609
4610	amdgpu_device_init_apu_flags(adev);
4611
4612	r = amdgpu_device_check_arguments(adev);
4613	if (r)
4614		return r;
4615
4616	spin_lock_init(&adev->mmio_idx_lock);
4617	spin_lock_init(&adev->smc_idx_lock);
4618	spin_lock_init(&adev->pcie_idx_lock);
4619	spin_lock_init(&adev->uvd_ctx_idx_lock);
4620	spin_lock_init(&adev->didt_idx_lock);
4621	spin_lock_init(&adev->gc_cac_idx_lock);
4622	spin_lock_init(&adev->se_cac_idx_lock);
4623	spin_lock_init(&adev->audio_endpt_idx_lock);
4624	spin_lock_init(&adev->mm_stats.lock);
4625	spin_lock_init(&adev->virt.rlcg_reg_lock);
4626	spin_lock_init(&adev->wb.lock);
4627
4628	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
4629
4630	INIT_LIST_HEAD(&adev->reset_list);
4631
4632	INIT_LIST_HEAD(&adev->ras_list);
4633
4634	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4635
4636	xa_init(&adev->userq_doorbell_xa);
4637
4638	INIT_DELAYED_WORK(&adev->delayed_init_work,
4639			  amdgpu_device_delayed_init_work_handler);
4640	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4641			  amdgpu_device_delay_enable_gfx_off);
4642	/*
4643	 * Initialize the enforce_isolation work structures for each XCP
4644	 * partition.  This work handler is responsible for enforcing shader
4645	 * isolation on AMD GPUs.  It counts the number of emitted fences for
4646	 * each GFX and compute ring.  If there are any fences, it schedules
4647	 * the `enforce_isolation_work` to be run after a delay.  If there are
4648	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4649	 * runqueue.
4650	 */
4651	for (i = 0; i < MAX_XCP; i++) {
4652		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4653				  amdgpu_gfx_enforce_isolation_handler);
4654		adev->gfx.enforce_isolation[i].adev = adev;
4655		adev->gfx.enforce_isolation[i].xcp_id = i;
4656	}
4657
4658	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4659	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
4660
4661	adev->gfx.gfx_off_req_count = 1;
4662	adev->gfx.gfx_off_residency = 0;
4663	adev->gfx.gfx_off_entrycount = 0;
4664	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4665
4666	atomic_set(&adev->throttling_logging_enabled, 1);
4667	/*
4668	 * If throttling continues, logging will be performed every minute
4669	 * to avoid log flooding. "-1" is subtracted since the thermal
4670	 * throttling interrupt comes every second. Thus, the total logging
4671	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4672	 * for throttling interrupt) = 60 seconds.
4673	 */
4674	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4675
4676	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4677
4678	/* Registers mapping */
4679	/* TODO: block userspace mapping of io register */
4680	if (adev->asic_type >= CHIP_BONAIRE) {
4681		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4682		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4683	} else {
4684		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4685		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4686	}
4687
4688	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4689		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4690
4691	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4692	if (!adev->rmmio)
4693		return -ENOMEM;
4694
4695	dev_info(adev->dev, "register mmio base: 0x%08X\n",
4696		 (uint32_t)adev->rmmio_base);
4697	dev_info(adev->dev, "register mmio size: %u\n",
4698		 (unsigned int)adev->rmmio_size);
4699
4700	/*
4701	 * Reset domain needs to be present early, before XGMI hive discovered
4702	 * (if any) and initialized to use reset sem and in_gpu reset flag
4703	 * early on during init and before calling to RREG32.
4704	 */
4705	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4706	if (!adev->reset_domain)
4707		return -ENOMEM;
4708
4709	/* detect hw virtualization here */
4710	amdgpu_virt_init(adev);
4711
4712	amdgpu_device_get_pcie_info(adev);
4713
4714	r = amdgpu_device_get_job_timeout_settings(adev);
4715	if (r) {
4716		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4717		return r;
4718	}
4719
4720	amdgpu_device_set_mcbp(adev);
4721
4722	/*
4723	 * By default, use default mode where all blocks are expected to be
4724	 * initialized. At present a 'swinit' of blocks is required to be
4725	 * completed before the need for a different level is detected.
4726	 */
4727	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
4728	/* early init functions */
4729	r = amdgpu_device_ip_early_init(adev);
4730	if (r)
4731		return r;
4732
4733	/*
4734	 * No need to remove conflicting FBs for non-display class devices.
4735	 * This prevents the sysfb from being freed accidently.
4736	 */
4737	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
4738	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
4739		/* Get rid of things like offb */
4740		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
4741		if (r)
4742			return r;
4743	}
4744
4745	/* Enable TMZ based on IP_VERSION */
4746	amdgpu_gmc_tmz_set(adev);
4747
4748	if (amdgpu_sriov_vf(adev) &&
4749	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4750		/* VF MMIO access (except mailbox range) from CPU
4751		 * will be blocked during sriov runtime
4752		 */
4753		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4754
4755	amdgpu_gmc_noretry_set(adev);
4756	/* Need to get xgmi info early to decide the reset behavior*/
4757	if (adev->gmc.xgmi.supported) {
4758		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4759		if (r)
4760			return r;
4761	}
4762
4763	/* enable PCIE atomic ops */
4764	if (amdgpu_sriov_vf(adev)) {
4765		if (adev->virt.fw_reserve.p_pf2vf)
4766			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4767						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4768				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4769	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4770	 * internal path natively support atomics, set have_atomics_support to true.
4771	 */
4772	} else if ((adev->flags & AMD_IS_APU) &&
4773		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4774		    IP_VERSION(9, 0, 0))) {
4775		adev->have_atomics_support = true;
4776	} else {
4777		adev->have_atomics_support =
4778			!pci_enable_atomic_ops_to_root(adev->pdev,
4779					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4780					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4781	}
4782
4783	if (!adev->have_atomics_support)
4784		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4785
4786	/* doorbell bar mapping and doorbell index init*/
4787	amdgpu_doorbell_init(adev);
4788
4789	if (amdgpu_emu_mode == 1) {
4790		/* post the asic on emulation mode */
4791		emu_soc_asic_init(adev);
4792		goto fence_driver_init;
4793	}
4794
4795	amdgpu_reset_init(adev);
4796
4797	/* detect if we are with an SRIOV vbios */
4798	if (adev->bios)
4799		amdgpu_device_detect_sriov_bios(adev);
4800
4801	/* check if we need to reset the asic
4802	 *  E.g., driver was not cleanly unloaded previously, etc.
4803	 */
4804	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4805		if (adev->gmc.xgmi.num_physical_nodes) {
4806			dev_info(adev->dev, "Pending hive reset.\n");
4807			amdgpu_set_init_level(adev,
4808					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4809		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4810				   !amdgpu_device_has_display_hardware(adev)) {
4811					r = psp_gpu_reset(adev);
4812		} else {
4813				tmp = amdgpu_reset_method;
4814				/* It should do a default reset when loading or reloading the driver,
4815				 * regardless of the module parameter reset_method.
4816				 */
4817				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4818				r = amdgpu_asic_reset(adev);
4819				amdgpu_reset_method = tmp;
4820		}
4821
4822		if (r) {
4823		  dev_err(adev->dev, "asic reset on init failed\n");
4824		  goto failed;
4825		}
4826	}
4827
4828	/* Post card if necessary */
4829	if (amdgpu_device_need_post(adev)) {
4830		if (!adev->bios) {
4831			dev_err(adev->dev, "no vBIOS found\n");
4832			r = -EINVAL;
4833			goto failed;
4834		}
4835		dev_info(adev->dev, "GPU posting now...\n");
4836		r = amdgpu_device_asic_init(adev);
4837		if (r) {
4838			dev_err(adev->dev, "gpu post error!\n");
4839			goto failed;
4840		}
4841	}
4842
4843	if (adev->bios) {
4844		if (adev->is_atom_fw) {
4845			/* Initialize clocks */
4846			r = amdgpu_atomfirmware_get_clock_info(adev);
4847			if (r) {
4848				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4849				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4850				goto failed;
4851			}
4852		} else {
4853			/* Initialize clocks */
4854			r = amdgpu_atombios_get_clock_info(adev);
4855			if (r) {
4856				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4857				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4858				goto failed;
4859			}
4860			/* init i2c buses */
4861			amdgpu_i2c_init(adev);
4862		}
4863	}
4864
4865fence_driver_init:
4866	/* Fence driver */
4867	r = amdgpu_fence_driver_sw_init(adev);
4868	if (r) {
4869		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4870		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4871		goto failed;
4872	}
4873
4874	/* init the mode config */
4875	drm_mode_config_init(adev_to_drm(adev));
4876
4877	r = amdgpu_device_ip_init(adev);
4878	if (r) {
4879		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4880		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4881		goto release_ras_con;
4882	}
4883
4884	amdgpu_fence_driver_hw_init(adev);
4885
4886	dev_info(adev->dev,
4887		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4888			adev->gfx.config.max_shader_engines,
4889			adev->gfx.config.max_sh_per_se,
4890			adev->gfx.config.max_cu_per_sh,
4891			adev->gfx.cu_info.number);
4892
4893	adev->accel_working = true;
4894
4895	amdgpu_vm_check_compute_bug(adev);
4896
4897	/* Initialize the buffer migration limit. */
4898	if (amdgpu_moverate >= 0)
4899		max_MBps = amdgpu_moverate;
4900	else
4901		max_MBps = 8; /* Allow 8 MB/s. */
4902	/* Get a log2 for easy divisions. */
4903	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4904
4905	/*
4906	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4907	 * Otherwise the mgpu fan boost feature will be skipped due to the
4908	 * gpu instance is counted less.
4909	 */
4910	amdgpu_register_gpu_instance(adev);
4911
4912	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4913	 * explicit gating rather than handling it automatically.
4914	 */
4915	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4916		r = amdgpu_device_ip_late_init(adev);
4917		if (r) {
4918			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4919			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4920			goto release_ras_con;
4921		}
4922		/* must succeed. */
4923		amdgpu_ras_resume(adev);
4924		queue_delayed_work(system_wq, &adev->delayed_init_work,
4925				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4926	}
4927
4928	if (amdgpu_sriov_vf(adev)) {
4929		amdgpu_virt_release_full_gpu(adev, true);
4930		flush_delayed_work(&adev->delayed_init_work);
4931	}
4932
4933	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4934		amdgpu_xgmi_reset_on_init(adev);
4935	/*
4936	 * Place those sysfs registering after `late_init`. As some of those
4937	 * operations performed in `late_init` might affect the sysfs
4938	 * interfaces creating.
4939	 */
4940	r = amdgpu_device_sys_interface_init(adev);
4941
4942	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4943		r = amdgpu_pmu_init(adev);
4944	if (r)
4945		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4946
4947	/* Have stored pci confspace at hand for restore in sudden PCI error */
4948	if (amdgpu_device_cache_pci_state(adev->pdev))
4949		pci_restore_state(pdev);
4950
4951	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4952	/* this will fail for cards that aren't VGA class devices, just
4953	 * ignore it
4954	 */
4955	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4956		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4957
4958	px = amdgpu_device_supports_px(adev);
4959
4960	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4961				apple_gmux_detect(NULL, NULL)))
4962		vga_switcheroo_register_client(adev->pdev,
4963					       &amdgpu_switcheroo_ops, px);
4964
4965	if (px)
4966		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4967
4968	amdgpu_device_check_iommu_direct_map(adev);
4969
4970	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4971	r = register_pm_notifier(&adev->pm_nb);
4972	if (r)
4973		goto failed;
4974
4975	return 0;
4976
4977release_ras_con:
4978	if (amdgpu_sriov_vf(adev))
4979		amdgpu_virt_release_full_gpu(adev, true);
4980
4981	/* failed in exclusive mode due to timeout */
4982	if (amdgpu_sriov_vf(adev) &&
4983		!amdgpu_sriov_runtime(adev) &&
4984		amdgpu_virt_mmio_blocked(adev) &&
4985		!amdgpu_virt_wait_reset(adev)) {
4986		dev_err(adev->dev, "VF exclusive mode timeout\n");
4987		/* Don't send request since VF is inactive. */
4988		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4989		adev->virt.ops = NULL;
4990		r = -EAGAIN;
4991	}
4992	amdgpu_release_ras_context(adev);
4993
4994failed:
4995	amdgpu_vf_error_trans_all(adev);
4996
4997	return r;
4998}
4999
5000static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
5001{
5002
5003	/* Clear all CPU mappings pointing to this device */
5004	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5005
5006	/* Unmap all mapped bars - Doorbell, registers and VRAM */
5007	amdgpu_doorbell_fini(adev);
5008
5009	iounmap(adev->rmmio);
5010	adev->rmmio = NULL;
5011	if (adev->mman.aper_base_kaddr)
5012		iounmap(adev->mman.aper_base_kaddr);
5013	adev->mman.aper_base_kaddr = NULL;
5014
5015	/* Memory manager related */
5016	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
5017		arch_phys_wc_del(adev->gmc.vram_mtrr);
5018		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
5019	}
5020}
5021
5022/**
5023 * amdgpu_device_fini_hw - tear down the driver
5024 *
5025 * @adev: amdgpu_device pointer
5026 *
5027 * Tear down the driver info (all asics).
5028 * Called at driver shutdown.
5029 */
5030void amdgpu_device_fini_hw(struct amdgpu_device *adev)
5031{
5032	dev_info(adev->dev, "amdgpu: finishing device.\n");
5033	flush_delayed_work(&adev->delayed_init_work);
5034
5035	if (adev->mman.initialized)
5036		drain_workqueue(adev->mman.bdev.wq);
5037	adev->shutdown = true;
5038
5039	unregister_pm_notifier(&adev->pm_nb);
5040
5041	/* make sure IB test finished before entering exclusive mode
5042	 * to avoid preemption on IB test
5043	 */
5044	if (amdgpu_sriov_vf(adev)) {
5045		amdgpu_virt_request_full_gpu(adev, false);
5046		amdgpu_virt_fini_data_exchange(adev);
5047	}
5048
5049	/* disable all interrupts */
5050	amdgpu_irq_disable_all(adev);
5051	if (adev->mode_info.mode_config_initialized) {
5052		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
5053			drm_helper_force_disable_all(adev_to_drm(adev));
5054		else
5055			drm_atomic_helper_shutdown(adev_to_drm(adev));
5056	}
5057	amdgpu_fence_driver_hw_fini(adev);
5058
5059	amdgpu_device_sys_interface_fini(adev);
5060
5061	/* disable ras feature must before hw fini */
5062	amdgpu_ras_pre_fini(adev);
5063
5064	amdgpu_ttm_set_buffer_funcs_status(adev, false);
5065
5066	amdgpu_device_ip_fini_early(adev);
5067
5068	amdgpu_irq_fini_hw(adev);
5069
5070	if (adev->mman.initialized)
5071		ttm_device_clear_dma_mappings(&adev->mman.bdev);
5072
5073	amdgpu_gart_dummy_page_fini(adev);
5074
5075	if (drm_dev_is_unplugged(adev_to_drm(adev)))
5076		amdgpu_device_unmap_mmio(adev);
5077
5078}
5079
5080void amdgpu_device_fini_sw(struct amdgpu_device *adev)
5081{
5082	int i, idx;
5083	bool px;
5084
5085	amdgpu_device_ip_fini(adev);
5086	amdgpu_fence_driver_sw_fini(adev);
5087	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
5088	adev->accel_working = false;
5089	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
5090	for (i = 0; i < MAX_XCP; ++i) {
5091		dma_fence_put(adev->isolation[i].spearhead);
5092		amdgpu_sync_free(&adev->isolation[i].active);
5093		amdgpu_sync_free(&adev->isolation[i].prev);
5094	}
5095
5096	amdgpu_reset_fini(adev);
5097
5098	/* free i2c buses */
5099	amdgpu_i2c_fini(adev);
5100
5101	if (adev->bios) {
5102		if (amdgpu_emu_mode != 1)
5103			amdgpu_atombios_fini(adev);
5104		amdgpu_bios_release(adev);
5105	}
5106
5107	kfree(adev->fru_info);
5108	adev->fru_info = NULL;
5109
5110	kfree(adev->xcp_mgr);
5111	adev->xcp_mgr = NULL;
5112
5113	px = amdgpu_device_supports_px(adev);
5114
5115	if (px || (!dev_is_removable(&adev->pdev->dev) &&
5116				apple_gmux_detect(NULL, NULL)))
5117		vga_switcheroo_unregister_client(adev->pdev);
5118
5119	if (px)
5120		vga_switcheroo_fini_domain_pm_ops(adev->dev);
5121
5122	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
5123		vga_client_unregister(adev->pdev);
5124
5125	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
5126
5127		iounmap(adev->rmmio);
5128		adev->rmmio = NULL;
5129		drm_dev_exit(idx);
5130	}
5131
5132	if (IS_ENABLED(CONFIG_PERF_EVENTS))
5133		amdgpu_pmu_fini(adev);
5134	if (adev->discovery.bin)
5135		amdgpu_discovery_fini(adev);
5136
5137	amdgpu_reset_put_reset_domain(adev->reset_domain);
5138	adev->reset_domain = NULL;
5139
5140	kfree(adev->pci_state);
5141	kfree(adev->pcie_reset_ctx.swds_pcistate);
5142	kfree(adev->pcie_reset_ctx.swus_pcistate);
5143}
5144
5145/**
5146 * amdgpu_device_evict_resources - evict device resources
5147 * @adev: amdgpu device object
5148 *
5149 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
5150 * of the vram memory type. Mainly used for evicting device resources
5151 * at suspend time.
5152 *
5153 */
5154static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
5155{
5156	int ret;
5157
5158	/* No need to evict vram on APUs unless going to S4 */
5159	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
5160		return 0;
5161
5162	/* No need to evict when going to S5 through S4 callbacks */
5163	if (system_state == SYSTEM_POWER_OFF)
5164		return 0;
5165
5166	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
5167	if (ret) {
5168		dev_warn(adev->dev, "evicting device resources failed\n");
5169		return ret;
5170	}
5171
5172	if (adev->in_s4) {
5173		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
5174		if (ret)
5175			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
5176	}
5177	return ret;
5178}
5179
5180/*
5181 * Suspend & resume.
5182 */
5183/**
5184 * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
5185 * @nb: notifier block
5186 * @mode: suspend mode
5187 * @data: data
5188 *
5189 * This function is called when the system is about to suspend or hibernate.
5190 * It is used to set the appropriate flags so that eviction can be optimized
5191 * in the pm prepare callback.
5192 */
5193static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
5194				     void *data)
5195{
5196	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
5197
5198	switch (mode) {
5199	case PM_HIBERNATION_PREPARE:
5200		adev->in_s4 = true;
5201		break;
5202	case PM_POST_HIBERNATION:
5203		adev->in_s4 = false;
5204		break;
5205	}
5206
5207	return NOTIFY_DONE;
5208}
5209
5210/**
5211 * amdgpu_device_prepare - prepare for device suspend
5212 *
5213 * @dev: drm dev pointer
5214 *
5215 * Prepare to put the hw in the suspend state (all asics).
5216 * Returns 0 for success or an error on failure.
5217 * Called at driver suspend.
5218 */
5219int amdgpu_device_prepare(struct drm_device *dev)
5220{
5221	struct amdgpu_device *adev = drm_to_adev(dev);
5222	int i, r;
5223
5224	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5225		return 0;
5226
5227	/* Evict the majority of BOs before starting suspend sequence */
5228	r = amdgpu_device_evict_resources(adev);
5229	if (r)
5230		return r;
5231
5232	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
5233
5234	for (i = 0; i < adev->num_ip_blocks; i++) {
5235		if (!adev->ip_blocks[i].status.valid)
5236			continue;
5237		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
5238			continue;
5239		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
5240		if (r)
5241			return r;
5242	}
5243
5244	return 0;
5245}
5246
5247/**
5248 * amdgpu_device_complete - complete power state transition
5249 *
5250 * @dev: drm dev pointer
5251 *
5252 * Undo the changes from amdgpu_device_prepare. This will be
5253 * called on all resume transitions, including those that failed.
5254 */
5255void amdgpu_device_complete(struct drm_device *dev)
5256{
5257	struct amdgpu_device *adev = drm_to_adev(dev);
5258	int i;
5259
5260	for (i = 0; i < adev->num_ip_blocks; i++) {
5261		if (!adev->ip_blocks[i].status.valid)
5262			continue;
5263		if (!adev->ip_blocks[i].version->funcs->complete)
5264			continue;
5265		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
5266	}
5267}
5268
5269/**
5270 * amdgpu_device_suspend - initiate device suspend
5271 *
5272 * @dev: drm dev pointer
5273 * @notify_clients: notify in-kernel DRM clients
5274 *
5275 * Puts the hw in the suspend state (all asics).
5276 * Returns 0 for success or an error on failure.
5277 * Called at driver suspend.
5278 */
5279int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
5280{
5281	struct amdgpu_device *adev = drm_to_adev(dev);
5282	int r, rec;
5283
5284	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5285		return 0;
5286
5287	adev->in_suspend = true;
5288
5289	if (amdgpu_sriov_vf(adev)) {
5290		if (!adev->in_runpm)
5291			amdgpu_amdkfd_suspend_process(adev);
5292		amdgpu_virt_fini_data_exchange(adev);
5293		r = amdgpu_virt_request_full_gpu(adev, false);
5294		if (r)
5295			return r;
5296	}
5297
5298	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
5299	if (r)
5300		goto unwind_sriov;
5301
5302	if (notify_clients)
5303		drm_client_dev_suspend(adev_to_drm(adev));
5304
5305	cancel_delayed_work_sync(&adev->delayed_init_work);
5306
5307	amdgpu_ras_suspend(adev);
5308
5309	r = amdgpu_device_ip_suspend_phase1(adev);
5310	if (r)
5311		goto unwind_smartshift;
5312
5313	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5314	r = amdgpu_userq_suspend(adev);
5315	if (r)
5316		goto unwind_ip_phase1;
5317
5318	r = amdgpu_device_evict_resources(adev);
5319	if (r)
5320		goto unwind_userq;
5321
5322	amdgpu_ttm_set_buffer_funcs_status(adev, false);
5323
5324	amdgpu_fence_driver_hw_fini(adev);
5325
5326	r = amdgpu_device_ip_suspend_phase2(adev);
5327	if (r)
5328		goto unwind_evict;
5329
5330	if (amdgpu_sriov_vf(adev))
5331		amdgpu_virt_release_full_gpu(adev, false);
5332
5333	return 0;
5334
5335unwind_evict:
5336	if (adev->mman.buffer_funcs_ring->sched.ready)
5337		amdgpu_ttm_set_buffer_funcs_status(adev, true);
5338	amdgpu_fence_driver_hw_init(adev);
5339
5340unwind_userq:
5341	rec = amdgpu_userq_resume(adev);
5342	if (rec) {
5343		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
5344		return r;
5345	}
5346	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5347	if (rec) {
5348		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
5349		return r;
5350	}
5351
5352unwind_ip_phase1:
5353	/* suspend phase 1 = resume phase 3 */
5354	rec = amdgpu_device_ip_resume_phase3(adev);
5355	if (rec) {
5356		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
5357		return r;
5358	}
5359
5360unwind_smartshift:
5361	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
5362	if (rec) {
5363		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
5364		return r;
5365	}
5366
5367	if (notify_clients)
5368		drm_client_dev_resume(adev_to_drm(adev));
5369
5370	amdgpu_ras_resume(adev);
5371
5372unwind_sriov:
5373	if (amdgpu_sriov_vf(adev)) {
5374		rec = amdgpu_virt_request_full_gpu(adev, true);
5375		if (rec) {
5376			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
5377			return r;
5378		}
5379	}
5380
5381	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
5382
5383	return r;
5384}
5385
5386static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
5387{
5388	int r;
5389	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
5390
5391	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
5392	 * may not work. The access could be blocked by nBIF protection as VF isn't in
5393	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
5394	 * so that QEMU reprograms MSIX table.
5395	 */
5396	amdgpu_restore_msix(adev);
5397
5398	r = adev->gfxhub.funcs->get_xgmi_info(adev);
5399	if (r)
5400		return r;
5401
5402	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
5403		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
5404
5405	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
5406	adev->vm_manager.vram_base_offset +=
5407		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
5408
5409	return 0;
5410}
5411
5412/**
5413 * amdgpu_device_resume - initiate device resume
5414 *
5415 * @dev: drm dev pointer
5416 * @notify_clients: notify in-kernel DRM clients
5417 *
5418 * Bring the hw back to operating state (all asics).
5419 * Returns 0 for success or an error on failure.
5420 * Called at driver resume.
5421 */
5422int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
5423{
5424	struct amdgpu_device *adev = drm_to_adev(dev);
5425	int r = 0;
5426
5427	if (amdgpu_sriov_vf(adev)) {
5428		r = amdgpu_virt_request_full_gpu(adev, true);
5429		if (r)
5430			return r;
5431	}
5432
5433	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
5434		r = amdgpu_virt_resume(adev);
5435		if (r)
5436			goto exit;
5437	}
5438
5439	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5440		return 0;
5441
5442	if (adev->in_s0ix)
5443		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
5444
5445	/* post card */
5446	if (amdgpu_device_need_post(adev)) {
5447		r = amdgpu_device_asic_init(adev);
5448		if (r)
5449			dev_err(adev->dev, "amdgpu asic init failed\n");
5450	}
5451
5452	r = amdgpu_device_ip_resume(adev);
5453
5454	if (r) {
5455		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
5456		goto exit;
5457	}
5458
5459	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5460	if (r)
5461		goto exit;
5462
5463	r = amdgpu_userq_resume(adev);
5464	if (r)
5465		goto exit;
5466
5467	r = amdgpu_device_ip_late_init(adev);
5468	if (r)
5469		goto exit;
5470
5471	queue_delayed_work(system_wq, &adev->delayed_init_work,
5472			   msecs_to_jiffies(AMDGPU_RESUME_MS));
5473exit:
5474	if (amdgpu_sriov_vf(adev)) {
5475		amdgpu_virt_init_data_exchange(adev);
5476		amdgpu_virt_release_full_gpu(adev, true);
5477
5478		if (!r && !adev->in_runpm)
5479			r = amdgpu_amdkfd_resume_process(adev);
5480	}
5481
5482	if (r)
5483		return r;
5484
5485	/* Make sure IB tests flushed */
5486	flush_delayed_work(&adev->delayed_init_work);
5487
5488	if (notify_clients)
5489		drm_client_dev_resume(adev_to_drm(adev));
5490
5491	amdgpu_ras_resume(adev);
5492
5493	if (adev->mode_info.num_crtc) {
5494		/*
5495		 * Most of the connector probing functions try to acquire runtime pm
5496		 * refs to ensure that the GPU is powered on when connector polling is
5497		 * performed. Since we're calling this from a runtime PM callback,
5498		 * trying to acquire rpm refs will cause us to deadlock.
5499		 *
5500		 * Since we're guaranteed to be holding the rpm lock, it's safe to
5501		 * temporarily disable the rpm helpers so this doesn't deadlock us.
5502		 */
5503#ifdef CONFIG_PM
5504		dev->dev->power.disable_depth++;
5505#endif
5506		if (!adev->dc_enabled)
5507			drm_helper_hpd_irq_event(dev);
5508		else
5509			drm_kms_helper_hotplug_event(dev);
5510#ifdef CONFIG_PM
5511		dev->dev->power.disable_depth--;
5512#endif
5513	}
5514
5515	amdgpu_vram_mgr_clear_reset_blocks(adev);
5516	adev->in_suspend = false;
5517
5518	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
5519		dev_warn(adev->dev, "smart shift update failed\n");
5520
5521	return 0;
5522}
5523
5524/**
5525 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
5526 *
5527 * @adev: amdgpu_device pointer
5528 *
5529 * The list of all the hardware IPs that make up the asic is walked and
5530 * the check_soft_reset callbacks are run.  check_soft_reset determines
5531 * if the asic is still hung or not.
5532 * Returns true if any of the IPs are still in a hung state, false if not.
5533 */
5534static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
5535{
5536	int i;
5537	bool asic_hang = false;
5538
5539	if (amdgpu_sriov_vf(adev))
5540		return true;
5541
5542	if (amdgpu_asic_need_full_reset(adev))
5543		return true;
5544
5545	for (i = 0; i < adev->num_ip_blocks; i++) {
5546		if (!adev->ip_blocks[i].status.valid)
5547			continue;
5548		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5549			adev->ip_blocks[i].status.hang =
5550				adev->ip_blocks[i].version->funcs->check_soft_reset(
5551					&adev->ip_blocks[i]);
5552		if (adev->ip_blocks[i].status.hang) {
5553			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
5554			asic_hang = true;
5555		}
5556	}
5557	return asic_hang;
5558}
5559
5560/**
5561 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5562 *
5563 * @adev: amdgpu_device pointer
5564 *
5565 * The list of all the hardware IPs that make up the asic is walked and the
5566 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
5567 * handles any IP specific hardware or software state changes that are
5568 * necessary for a soft reset to succeed.
5569 * Returns 0 on success, negative error code on failure.
5570 */
5571static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5572{
5573	int i, r = 0;
5574
5575	for (i = 0; i < adev->num_ip_blocks; i++) {
5576		if (!adev->ip_blocks[i].status.valid)
5577			continue;
5578		if (adev->ip_blocks[i].status.hang &&
5579		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5580			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5581			if (r)
5582				return r;
5583		}
5584	}
5585
5586	return 0;
5587}
5588
5589/**
5590 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5591 *
5592 * @adev: amdgpu_device pointer
5593 *
5594 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
5595 * reset is necessary to recover.
5596 * Returns true if a full asic reset is required, false if not.
5597 */
5598static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5599{
5600	int i;
5601
5602	if (amdgpu_asic_need_full_reset(adev))
5603		return true;
5604
5605	for (i = 0; i < adev->num_ip_blocks; i++) {
5606		if (!adev->ip_blocks[i].status.valid)
5607			continue;
5608		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5609		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5610		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5611		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5612		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5613			if (adev->ip_blocks[i].status.hang) {
5614				dev_info(adev->dev, "Some block need full reset!\n");
5615				return true;
5616			}
5617		}
5618	}
5619	return false;
5620}
5621
5622/**
5623 * amdgpu_device_ip_soft_reset - do a soft reset
5624 *
5625 * @adev: amdgpu_device pointer
5626 *
5627 * The list of all the hardware IPs that make up the asic is walked and the
5628 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
5629 * IP specific hardware or software state changes that are necessary to soft
5630 * reset the IP.
5631 * Returns 0 on success, negative error code on failure.
5632 */
5633static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5634{
5635	int i, r = 0;
5636
5637	for (i = 0; i < adev->num_ip_blocks; i++) {
5638		if (!adev->ip_blocks[i].status.valid)
5639			continue;
5640		if (adev->ip_blocks[i].status.hang &&
5641		    adev->ip_blocks[i].version->funcs->soft_reset) {
5642			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5643			if (r)
5644				return r;
5645		}
5646	}
5647
5648	return 0;
5649}
5650
5651/**
5652 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5653 *
5654 * @adev: amdgpu_device pointer
5655 *
5656 * The list of all the hardware IPs that make up the asic is walked and the
5657 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
5658 * handles any IP specific hardware or software state changes that are
5659 * necessary after the IP has been soft reset.
5660 * Returns 0 on success, negative error code on failure.
5661 */
5662static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5663{
5664	int i, r = 0;
5665
5666	for (i = 0; i < adev->num_ip_blocks; i++) {
5667		if (!adev->ip_blocks[i].status.valid)
5668			continue;
5669		if (adev->ip_blocks[i].status.hang &&
5670		    adev->ip_blocks[i].version->funcs->post_soft_reset)
5671			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5672		if (r)
5673			return r;
5674	}
5675
5676	return 0;
5677}
5678
5679/**
5680 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5681 *
5682 * @adev: amdgpu_device pointer
5683 * @reset_context: amdgpu reset context pointer
5684 *
5685 * do VF FLR and reinitialize Asic
5686 * return 0 means succeeded otherwise failed
5687 */
5688static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5689				     struct amdgpu_reset_context *reset_context)
5690{
5691	int r;
5692	struct amdgpu_hive_info *hive = NULL;
5693
5694	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5695		if (!amdgpu_ras_get_fed_status(adev))
5696			amdgpu_virt_ready_to_reset(adev);
5697		amdgpu_virt_wait_reset(adev);
5698		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5699		r = amdgpu_virt_request_full_gpu(adev, true);
5700	} else {
5701		r = amdgpu_virt_reset_gpu(adev);
5702	}
5703	if (r)
5704		return r;
5705
5706	amdgpu_ras_clear_err_state(adev);
5707	amdgpu_irq_gpu_reset_resume_helper(adev);
5708
5709	/* some sw clean up VF needs to do before recover */
5710	amdgpu_virt_post_reset(adev);
5711
5712	/* Resume IP prior to SMC */
5713	r = amdgpu_device_ip_reinit_early_sriov(adev);
5714	if (r)
5715		return r;
5716
5717	amdgpu_virt_init_data_exchange(adev);
5718
5719	r = amdgpu_device_fw_loading(adev);
5720	if (r)
5721		return r;
5722
5723	/* now we are okay to resume SMC/CP/SDMA */
5724	r = amdgpu_device_ip_reinit_late_sriov(adev);
5725	if (r)
5726		return r;
5727
5728	hive = amdgpu_get_xgmi_hive(adev);
5729	/* Update PSP FW topology after reset */
5730	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5731		r = amdgpu_xgmi_update_topology(hive, adev);
5732	if (hive)
5733		amdgpu_put_xgmi_hive(hive);
5734	if (r)
5735		return r;
5736
5737	r = amdgpu_ib_ring_tests(adev);
5738	if (r)
5739		return r;
5740
5741	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5742		amdgpu_inc_vram_lost(adev);
5743
5744	/* need to be called during full access so we can't do it later like
5745	 * bare-metal does.
5746	 */
5747	amdgpu_amdkfd_post_reset(adev);
5748	amdgpu_virt_release_full_gpu(adev, true);
5749
5750	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5751	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5752	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5753	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5754	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
5755	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5756		amdgpu_ras_resume(adev);
5757
5758	amdgpu_virt_ras_telemetry_post_reset(adev);
5759
5760	return 0;
5761}
5762
5763/**
5764 * amdgpu_device_has_job_running - check if there is any unfinished job
5765 *
5766 * @adev: amdgpu_device pointer
5767 *
5768 * check if there is any job running on the device when guest driver receives
5769 * FLR notification from host driver. If there are still jobs running, then
5770 * the guest driver will not respond the FLR reset. Instead, let the job hit
5771 * the timeout and guest driver then issue the reset request.
5772 */
5773bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5774{
5775	int i;
5776
5777	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5778		struct amdgpu_ring *ring = adev->rings[i];
5779
5780		if (!amdgpu_ring_sched_ready(ring))
5781			continue;
5782
5783		if (amdgpu_fence_count_emitted(ring))
5784			return true;
5785	}
5786	return false;
5787}
5788
5789/**
5790 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5791 *
5792 * @adev: amdgpu_device pointer
5793 *
5794 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5795 * a hung GPU.
5796 */
5797bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5798{
5799
5800	if (amdgpu_gpu_recovery == 0)
5801		goto disabled;
5802
5803	/* Skip soft reset check in fatal error mode */
5804	if (!amdgpu_ras_is_poison_mode_supported(adev))
5805		return true;
5806
5807	if (amdgpu_sriov_vf(adev))
5808		return true;
5809
5810	if (amdgpu_gpu_recovery == -1) {
5811		switch (adev->asic_type) {
5812#ifdef CONFIG_DRM_AMDGPU_SI
5813		case CHIP_VERDE:
5814		case CHIP_TAHITI:
5815		case CHIP_PITCAIRN:
5816		case CHIP_OLAND:
5817		case CHIP_HAINAN:
5818#endif
5819#ifdef CONFIG_DRM_AMDGPU_CIK
5820		case CHIP_KAVERI:
5821		case CHIP_KABINI:
5822		case CHIP_MULLINS:
5823#endif
5824		case CHIP_CARRIZO:
5825		case CHIP_STONEY:
5826		case CHIP_CYAN_SKILLFISH:
5827			goto disabled;
5828		default:
5829			break;
5830		}
5831	}
5832
5833	return true;
5834
5835disabled:
5836		dev_info(adev->dev, "GPU recovery disabled.\n");
5837		return false;
5838}
5839
5840int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5841{
5842	u32 i;
5843	int ret = 0;
5844
5845	if (adev->bios)
5846		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5847
5848	dev_info(adev->dev, "GPU mode1 reset\n");
5849
5850	/* Cache the state before bus master disable. The saved config space
5851	 * values are used in other cases like restore after mode-2 reset.
5852	 */
5853	amdgpu_device_cache_pci_state(adev->pdev);
5854
5855	/* disable BM */
5856	pci_clear_master(adev->pdev);
5857
5858	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5859		dev_info(adev->dev, "GPU smu mode1 reset\n");
5860		ret = amdgpu_dpm_mode1_reset(adev);
5861	} else {
5862		dev_info(adev->dev, "GPU psp mode1 reset\n");
5863		ret = psp_gpu_reset(adev);
5864	}
5865
5866	if (ret)
5867		goto mode1_reset_failed;
5868
5869	/* enable mmio access after mode 1 reset completed */
5870	adev->no_hw_access = false;
5871
5872	amdgpu_device_load_pci_state(adev->pdev);
5873	ret = amdgpu_psp_wait_for_bootloader(adev);
5874	if (ret)
5875		goto mode1_reset_failed;
5876
5877	/* wait for asic to come out of reset */
5878	for (i = 0; i < adev->usec_timeout; i++) {
5879		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5880
5881		if (memsize != 0xffffffff)
5882			break;
5883		udelay(1);
5884	}
5885
5886	if (i >= adev->usec_timeout) {
5887		ret = -ETIMEDOUT;
5888		goto mode1_reset_failed;
5889	}
5890
5891	if (adev->bios)
5892		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5893
5894	return 0;
5895
5896mode1_reset_failed:
5897	dev_err(adev->dev, "GPU mode1 reset failed\n");
5898	return ret;
5899}
5900
5901int amdgpu_device_link_reset(struct amdgpu_device *adev)
5902{
5903	int ret = 0;
5904
5905	dev_info(adev->dev, "GPU link reset\n");
5906
5907	if (!amdgpu_reset_in_dpc(adev))
5908		ret = amdgpu_dpm_link_reset(adev);
5909
5910	if (ret)
5911		goto link_reset_failed;
5912
5913	ret = amdgpu_psp_wait_for_bootloader(adev);
5914	if (ret)
5915		goto link_reset_failed;
5916
5917	return 0;
5918
5919link_reset_failed:
5920	dev_err(adev->dev, "GPU link reset failed\n");
5921	return ret;
5922}
5923
5924int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5925				 struct amdgpu_reset_context *reset_context)
5926{
5927	int i, r = 0;
5928	struct amdgpu_job *job = NULL;
5929	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5930	bool need_full_reset =
5931		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5932
5933	if (reset_context->reset_req_dev == adev)
5934		job = reset_context->job;
5935
5936	if (amdgpu_sriov_vf(adev))
5937		amdgpu_virt_pre_reset(adev);
5938
5939	amdgpu_fence_driver_isr_toggle(adev, true);
5940
5941	/* block all schedulers and reset given job's ring */
5942	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5943		struct amdgpu_ring *ring = adev->rings[i];
5944
5945		if (!amdgpu_ring_sched_ready(ring))
5946			continue;
5947
5948		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5949		amdgpu_fence_driver_force_completion(ring);
5950	}
5951
5952	amdgpu_fence_driver_isr_toggle(adev, false);
5953
5954	if (job && job->vm)
5955		drm_sched_increase_karma(&job->base);
5956
5957	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5958	/* If reset handler not implemented, continue; otherwise return */
5959	if (r == -EOPNOTSUPP)
5960		r = 0;
5961	else
5962		return r;
5963
5964	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5965	if (!amdgpu_sriov_vf(adev)) {
5966
5967		if (!need_full_reset)
5968			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5969
5970		if (!need_full_reset && amdgpu_gpu_recovery &&
5971		    amdgpu_device_ip_check_soft_reset(adev)) {
5972			amdgpu_device_ip_pre_soft_reset(adev);
5973			r = amdgpu_device_ip_soft_reset(adev);
5974			amdgpu_device_ip_post_soft_reset(adev);
5975			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5976				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5977				need_full_reset = true;
5978			}
5979		}
5980
5981		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5982			dev_info(tmp_adev->dev, "Dumping IP State\n");
5983			/* Trigger ip dump before we reset the asic */
5984			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5985				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5986					tmp_adev->ip_blocks[i].version->funcs
5987						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5988			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5989		}
5990
5991		if (need_full_reset)
5992			r = amdgpu_device_ip_suspend(adev);
5993		if (need_full_reset)
5994			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5995		else
5996			clear_bit(AMDGPU_NEED_FULL_RESET,
5997				  &reset_context->flags);
5998	}
5999
6000	return r;
6001}
6002
6003int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
6004{
6005	struct list_head *device_list_handle;
6006	bool full_reset, vram_lost = false;
6007	struct amdgpu_device *tmp_adev;
6008	int r, init_level;
6009
6010	device_list_handle = reset_context->reset_device_list;
6011
6012	if (!device_list_handle)
6013		return -EINVAL;
6014
6015	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6016
6017	/**
6018	 * If it's reset on init, it's default init level, otherwise keep level
6019	 * as recovery level.
6020	 */
6021	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
6022			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
6023	else
6024			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
6025
6026	r = 0;
6027	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6028		amdgpu_set_init_level(tmp_adev, init_level);
6029		if (full_reset) {
6030			/* post card */
6031			amdgpu_reset_set_dpc_status(tmp_adev, false);
6032			amdgpu_ras_clear_err_state(tmp_adev);
6033			r = amdgpu_device_asic_init(tmp_adev);
6034			if (r) {
6035				dev_warn(tmp_adev->dev, "asic atom init failed!");
6036			} else {
6037				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
6038
6039				r = amdgpu_device_ip_resume_phase1(tmp_adev);
6040				if (r)
6041					goto out;
6042
6043				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
6044
6045				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
6046					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
6047
6048				if (vram_lost) {
6049					dev_info(
6050						tmp_adev->dev,
6051						"VRAM is lost due to GPU reset!\n");
6052					amdgpu_inc_vram_lost(tmp_adev);
6053				}
6054
6055				r = amdgpu_device_fw_loading(tmp_adev);
6056				if (r)
6057					return r;
6058
6059				r = amdgpu_xcp_restore_partition_mode(
6060					tmp_adev->xcp_mgr);
6061				if (r)
6062					goto out;
6063
6064				r = amdgpu_device_ip_resume_phase2(tmp_adev);
6065				if (r)
6066					goto out;
6067
6068				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
6069					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
6070
6071				r = amdgpu_device_ip_resume_phase3(tmp_adev);
6072				if (r)
6073					goto out;
6074
6075				if (vram_lost)
6076					amdgpu_device_fill_reset_magic(tmp_adev);
6077
6078				/*
6079				 * Add this ASIC as tracked as reset was already
6080				 * complete successfully.
6081				 */
6082				amdgpu_register_gpu_instance(tmp_adev);
6083
6084				if (!reset_context->hive &&
6085				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
6086					amdgpu_xgmi_add_device(tmp_adev);
6087
6088				r = amdgpu_device_ip_late_init(tmp_adev);
6089				if (r)
6090					goto out;
6091
6092				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
6093				if (r)
6094					goto out;
6095
6096				drm_client_dev_resume(adev_to_drm(tmp_adev));
6097
6098				/*
6099				 * The GPU enters bad state once faulty pages
6100				 * by ECC has reached the threshold, and ras
6101				 * recovery is scheduled next. So add one check
6102				 * here to break recovery if it indeed exceeds
6103				 * bad page threshold, and remind user to
6104				 * retire this GPU or setting one bigger
6105				 * bad_page_threshold value to fix this once
6106				 * probing driver again.
6107				 */
6108				if (!amdgpu_ras_is_rma(tmp_adev)) {
6109					/* must succeed. */
6110					amdgpu_ras_resume(tmp_adev);
6111				} else {
6112					r = -EINVAL;
6113					goto out;
6114				}
6115
6116				/* Update PSP FW topology after reset */
6117				if (reset_context->hive &&
6118				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
6119					r = amdgpu_xgmi_update_topology(
6120						reset_context->hive, tmp_adev);
6121			}
6122		}
6123
6124out:
6125		if (!r) {
6126			/* IP init is complete now, set level as default */
6127			amdgpu_set_init_level(tmp_adev,
6128					      AMDGPU_INIT_LEVEL_DEFAULT);
6129			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
6130			r = amdgpu_ib_ring_tests(tmp_adev);
6131			if (r) {
6132				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
6133				r = -EAGAIN;
6134				goto end;
6135			}
6136		}
6137
6138		if (r)
6139			tmp_adev->asic_reset_res = r;
6140	}
6141
6142end:
6143	return r;
6144}
6145
6146int amdgpu_do_asic_reset(struct list_head *device_list_handle,
6147			 struct amdgpu_reset_context *reset_context)
6148{
6149	struct amdgpu_device *tmp_adev = NULL;
6150	bool need_full_reset, skip_hw_reset;
6151	int r = 0;
6152
6153	/* Try reset handler method first */
6154	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
6155				    reset_list);
6156
6157	reset_context->reset_device_list = device_list_handle;
6158	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
6159	/* If reset handler not implemented, continue; otherwise return */
6160	if (r == -EOPNOTSUPP)
6161		r = 0;
6162	else
6163		return r;
6164
6165	/* Reset handler not implemented, use the default method */
6166	need_full_reset =
6167		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6168	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
6169
6170	/*
6171	 * ASIC reset has to be done on all XGMI hive nodes ASAP
6172	 * to allow proper links negotiation in FW (within 1 sec)
6173	 */
6174	if (!skip_hw_reset && need_full_reset) {
6175		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6176			/* For XGMI run all resets in parallel to speed up the process */
6177			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
6178				if (!queue_work(system_unbound_wq,
6179						&tmp_adev->xgmi_reset_work))
6180					r = -EALREADY;
6181			} else
6182				r = amdgpu_asic_reset(tmp_adev);
6183
6184			if (r) {
6185				dev_err(tmp_adev->dev,
6186					"ASIC reset failed with error, %d for drm dev, %s",
6187					r, adev_to_drm(tmp_adev)->unique);
6188				goto out;
6189			}
6190		}
6191
6192		/* For XGMI wait for all resets to complete before proceed */
6193		if (!r) {
6194			list_for_each_entry(tmp_adev, device_list_handle,
6195					    reset_list) {
6196				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
6197					flush_work(&tmp_adev->xgmi_reset_work);
6198					r = tmp_adev->asic_reset_res;
6199					if (r)
6200						break;
6201				}
6202			}
6203		}
6204	}
6205
6206	if (!r && amdgpu_ras_intr_triggered()) {
6207		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6208			amdgpu_ras_reset_error_count(tmp_adev,
6209						     AMDGPU_RAS_BLOCK__MMHUB);
6210		}
6211
6212		amdgpu_ras_intr_cleared();
6213	}
6214
6215	r = amdgpu_device_reinit_after_reset(reset_context);
6216	if (r == -EAGAIN)
6217		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6218	else
6219		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6220
6221out:
6222	return r;
6223}
6224
6225static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
6226{
6227
6228	switch (amdgpu_asic_reset_method(adev)) {
6229	case AMD_RESET_METHOD_MODE1:
6230	case AMD_RESET_METHOD_LINK:
6231		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
6232		break;
6233	case AMD_RESET_METHOD_MODE2:
6234		adev->mp1_state = PP_MP1_STATE_RESET;
6235		break;
6236	default:
6237		adev->mp1_state = PP_MP1_STATE_NONE;
6238		break;
6239	}
6240}
6241
6242static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
6243{
6244	amdgpu_vf_error_trans_all(adev);
6245	adev->mp1_state = PP_MP1_STATE_NONE;
6246}
6247
6248static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
6249{
6250	struct pci_dev *p = NULL;
6251
6252	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
6253			adev->pdev->bus->number, 1);
6254	if (p) {
6255		pm_runtime_enable(&(p->dev));
6256		pm_runtime_resume(&(p->dev));
6257	}
6258
6259	pci_dev_put(p);
6260}
6261
6262static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
6263{
6264	enum amd_reset_method reset_method;
6265	struct pci_dev *p = NULL;
6266	u64 expires;
6267
6268	/*
6269	 * For now, only BACO and mode1 reset are confirmed
6270	 * to suffer the audio issue without proper suspended.
6271	 */
6272	reset_method = amdgpu_asic_reset_method(adev);
6273	if ((reset_method != AMD_RESET_METHOD_BACO) &&
6274	     (reset_method != AMD_RESET_METHOD_MODE1))
6275		return -EINVAL;
6276
6277	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
6278			adev->pdev->bus->number, 1);
6279	if (!p)
6280		return -ENODEV;
6281
6282	expires = pm_runtime_autosuspend_expiration(&(p->dev));
6283	if (!expires)
6284		/*
6285		 * If we cannot get the audio device autosuspend delay,
6286		 * a fixed 4S interval will be used. Considering 3S is
6287		 * the audio controller default autosuspend delay setting.
6288		 * 4S used here is guaranteed to cover that.
6289		 */
6290		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
6291
6292	while (!pm_runtime_status_suspended(&(p->dev))) {
6293		if (!pm_runtime_suspend(&(p->dev)))
6294			break;
6295
6296		if (expires < ktime_get_mono_fast_ns()) {
6297			dev_warn(adev->dev, "failed to suspend display audio\n");
6298			pci_dev_put(p);
6299			/* TODO: abort the succeeding gpu reset? */
6300			return -ETIMEDOUT;
6301		}
6302	}
6303
6304	pm_runtime_disable(&(p->dev));
6305
6306	pci_dev_put(p);
6307	return 0;
6308}
6309
6310static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
6311{
6312	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
6313
6314#if defined(CONFIG_DEBUG_FS)
6315	if (!amdgpu_sriov_vf(adev))
6316		cancel_work(&adev->reset_work);
6317#endif
6318	cancel_work(&adev->userq_reset_work);
6319
6320	if (adev->kfd.dev)
6321		cancel_work(&adev->kfd.reset_work);
6322
6323	if (amdgpu_sriov_vf(adev))
6324		cancel_work(&adev->virt.flr_work);
6325
6326	if (con && adev->ras_enabled)
6327		cancel_work(&con->recovery_work);
6328
6329}
6330
6331static int amdgpu_device_health_check(struct list_head *device_list_handle)
6332{
6333	struct amdgpu_device *tmp_adev;
6334	int ret = 0;
6335
6336	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6337		ret |= amdgpu_device_bus_status_check(tmp_adev);
6338	}
6339
6340	return ret;
6341}
6342
6343static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
6344					  struct list_head *device_list,
6345					  struct amdgpu_hive_info *hive)
6346{
6347	struct amdgpu_device *tmp_adev = NULL;
6348
6349	/*
6350	 * Build list of devices to reset.
6351	 * In case we are in XGMI hive mode, resort the device list
6352	 * to put adev in the 1st position.
6353	 */
6354	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
6355		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6356			list_add_tail(&tmp_adev->reset_list, device_list);
6357			if (adev->shutdown)
6358				tmp_adev->shutdown = true;
6359			if (amdgpu_reset_in_dpc(adev))
6360				tmp_adev->pcie_reset_ctx.in_link_reset = true;
6361		}
6362		if (!list_is_first(&adev->reset_list, device_list))
6363			list_rotate_to_front(&adev->reset_list, device_list);
6364	} else {
6365		list_add_tail(&adev->reset_list, device_list);
6366	}
6367}
6368
6369static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
6370						  struct list_head *device_list)
6371{
6372	struct amdgpu_device *tmp_adev = NULL;
6373
6374	if (list_empty(device_list))
6375		return;
6376	tmp_adev =
6377		list_first_entry(device_list, struct amdgpu_device, reset_list);
6378	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
6379}
6380
6381static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
6382						  struct list_head *device_list)
6383{
6384	struct amdgpu_device *tmp_adev = NULL;
6385
6386	if (list_empty(device_list))
6387		return;
6388	tmp_adev =
6389		list_first_entry(device_list, struct amdgpu_device, reset_list);
6390	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
6391}
6392
6393static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
6394					  struct amdgpu_job *job,
6395					  struct amdgpu_reset_context *reset_context,
6396					  struct list_head *device_list,
6397					  struct amdgpu_hive_info *hive,
6398					  bool need_emergency_restart)
6399{
6400	struct amdgpu_device *tmp_adev = NULL;
6401	int i;
6402
6403	/* block all schedulers and reset given job's ring */
6404	list_for_each_entry(tmp_adev, device_list, reset_list) {
6405		amdgpu_device_set_mp1_state(tmp_adev);
6406
6407		/*
6408		 * Try to put the audio codec into suspend state
6409		 * before gpu reset started.
6410		 *
6411		 * Due to the power domain of the graphics device
6412		 * is shared with AZ power domain. Without this,
6413		 * we may change the audio hardware from behind
6414		 * the audio driver's back. That will trigger
6415		 * some audio codec errors.
6416		 */
6417		if (!amdgpu_device_suspend_display_audio(tmp_adev))
6418			tmp_adev->pcie_reset_ctx.audio_suspended = true;
6419
6420		amdgpu_ras_set_error_query_ready(tmp_adev, false);
6421
6422		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
6423
6424		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
6425
6426		/*
6427		 * Mark these ASICs to be reset as untracked first
6428		 * And add them back after reset completed
6429		 */
6430		amdgpu_unregister_gpu_instance(tmp_adev);
6431
6432		drm_client_dev_suspend(adev_to_drm(tmp_adev));
6433
6434		/* disable ras on ALL IPs */
6435		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
6436		    amdgpu_device_ip_need_full_reset(tmp_adev))
6437			amdgpu_ras_suspend(tmp_adev);
6438
6439		amdgpu_userq_pre_reset(tmp_adev);
6440
6441		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6442			struct amdgpu_ring *ring = tmp_adev->rings[i];
6443
6444			if (!amdgpu_ring_sched_ready(ring))
6445				continue;
6446
6447			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
6448
6449			if (need_emergency_restart)
6450				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
6451		}
6452		atomic_inc(&tmp_adev->gpu_reset_counter);
6453	}
6454}
6455
6456static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
6457			      struct list_head *device_list,
6458			      struct amdgpu_reset_context *reset_context)
6459{
6460	struct amdgpu_device *tmp_adev = NULL;
6461	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
6462	int r = 0;
6463
6464retry:	/* Rest of adevs pre asic reset from XGMI hive. */
6465	list_for_each_entry(tmp_adev, device_list, reset_list) {
6466		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
6467		/*TODO Should we stop ?*/
6468		if (r) {
6469			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
6470				  r, adev_to_drm(tmp_adev)->unique);
6471			tmp_adev->asic_reset_res = r;
6472		}
6473	}
6474
6475	/* Actual ASIC resets if needed.*/
6476	/* Host driver will handle XGMI hive reset for SRIOV */
6477	if (amdgpu_sriov_vf(adev)) {
6478
6479		/* Bail out of reset early */
6480		if (amdgpu_ras_is_rma(adev))
6481			return -ENODEV;
6482
6483		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
6484			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
6485			amdgpu_ras_set_fed(adev, true);
6486			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
6487		}
6488
6489		r = amdgpu_device_reset_sriov(adev, reset_context);
6490		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
6491			amdgpu_virt_release_full_gpu(adev, true);
6492			goto retry;
6493		}
6494		if (r)
6495			adev->asic_reset_res = r;
6496	} else {
6497		r = amdgpu_do_asic_reset(device_list, reset_context);
6498		if (r && r == -EAGAIN)
6499			goto retry;
6500	}
6501
6502	list_for_each_entry(tmp_adev, device_list, reset_list) {
6503		/*
6504		 * Drop any pending non scheduler resets queued before reset is done.
6505		 * Any reset scheduled after this point would be valid. Scheduler resets
6506		 * were already dropped during drm_sched_stop and no new ones can come
6507		 * in before drm_sched_start.
6508		 */
6509		amdgpu_device_stop_pending_resets(tmp_adev);
6510	}
6511
6512	return r;
6513}
6514
6515static int amdgpu_device_sched_resume(struct list_head *device_list,
6516			      struct amdgpu_reset_context *reset_context,
6517			      bool   job_signaled)
6518{
6519	struct amdgpu_device *tmp_adev = NULL;
6520	int i, r = 0;
6521
6522	/* Post ASIC reset for all devs .*/
6523	list_for_each_entry(tmp_adev, device_list, reset_list) {
6524
6525		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6526			struct amdgpu_ring *ring = tmp_adev->rings[i];
6527
6528			if (!amdgpu_ring_sched_ready(ring))
6529				continue;
6530
6531			drm_sched_start(&ring->sched, 0);
6532		}
6533
6534		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
6535			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
6536
6537		if (tmp_adev->asic_reset_res) {
6538			/* bad news, how to tell it to userspace ?
6539			 * for ras error, we should report GPU bad status instead of
6540			 * reset failure
6541			 */
6542			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
6543			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
6544				dev_info(
6545					tmp_adev->dev,
6546					"GPU reset(%d) failed with error %d \n",
6547					atomic_read(
6548						&tmp_adev->gpu_reset_counter),
6549					tmp_adev->asic_reset_res);
6550			amdgpu_vf_error_put(tmp_adev,
6551					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
6552					    tmp_adev->asic_reset_res);
6553			if (!r)
6554				r = tmp_adev->asic_reset_res;
6555			tmp_adev->asic_reset_res = 0;
6556		} else {
6557			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
6558				 atomic_read(&tmp_adev->gpu_reset_counter));
6559			if (amdgpu_acpi_smart_shift_update(tmp_adev,
6560							   AMDGPU_SS_DEV_D0))
6561				dev_warn(tmp_adev->dev,
6562					 "smart shift update failed\n");
6563		}
6564	}
6565
6566	return r;
6567}
6568
6569static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
6570			      struct list_head *device_list,
6571			      bool   need_emergency_restart)
6572{
6573	struct amdgpu_device *tmp_adev = NULL;
6574
6575	list_for_each_entry(tmp_adev, device_list, reset_list) {
6576		/* unlock kfd: SRIOV would do it separately */
6577		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
6578			amdgpu_amdkfd_post_reset(tmp_adev);
6579
6580		/* kfd_post_reset will do nothing if kfd device is not initialized,
6581		 * need to bring up kfd here if it's not be initialized before
6582		 */
6583		if (!adev->kfd.init_complete)
6584			amdgpu_amdkfd_device_init(adev);
6585
6586		if (tmp_adev->pcie_reset_ctx.audio_suspended)
6587			amdgpu_device_resume_display_audio(tmp_adev);
6588
6589		amdgpu_device_unset_mp1_state(tmp_adev);
6590
6591		amdgpu_ras_set_error_query_ready(tmp_adev, true);
6592
6593	}
6594}
6595
6596
6597/**
6598 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
6599 *
6600 * @adev: amdgpu_device pointer
6601 * @job: which job trigger hang
6602 * @reset_context: amdgpu reset context pointer
6603 *
6604 * Attempt to reset the GPU if it has hung (all asics).
6605 * Attempt to do soft-reset or full-reset and reinitialize Asic
6606 * Returns 0 for success or an error on failure.
6607 */
6608
6609int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
6610			      struct amdgpu_job *job,
6611			      struct amdgpu_reset_context *reset_context)
6612{
6613	struct list_head device_list;
6614	bool job_signaled = false;
6615	struct amdgpu_hive_info *hive = NULL;
6616	int r = 0;
6617	bool need_emergency_restart = false;
6618	/* save the pasid here as the job may be freed before the end of the reset */
6619	int pasid = job ? job->pasid : -EINVAL;
6620
6621	/*
6622	 * If it reaches here because of hang/timeout and a RAS error is
6623	 * detected at the same time, let RAS recovery take care of it.
6624	 */
6625	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
6626	    !amdgpu_sriov_vf(adev) &&
6627	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
6628		dev_dbg(adev->dev,
6629			"Gpu recovery from source: %d yielding to RAS error recovery handling",
6630			reset_context->src);
6631		return 0;
6632	}
6633
6634	/*
6635	 * Special case: RAS triggered and full reset isn't supported
6636	 */
6637	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
6638
6639	/*
6640	 * Flush RAM to disk so that after reboot
6641	 * the user can read log and see why the system rebooted.
6642	 */
6643	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
6644		amdgpu_ras_get_context(adev)->reboot) {
6645		dev_warn(adev->dev, "Emergency reboot.");
6646
6647		ksys_sync_helper();
6648		emergency_restart();
6649	}
6650
6651	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
6652		 need_emergency_restart ? "jobs stop" : "reset",
6653		 reset_context->src);
6654
6655	if (!amdgpu_sriov_vf(adev))
6656		hive = amdgpu_get_xgmi_hive(adev);
6657	if (hive)
6658		mutex_lock(&hive->hive_lock);
6659
6660	reset_context->job = job;
6661	reset_context->hive = hive;
6662	INIT_LIST_HEAD(&device_list);
6663
6664	amdgpu_device_recovery_prepare(adev, &device_list, hive);
6665
6666	if (!amdgpu_sriov_vf(adev)) {
6667		r = amdgpu_device_health_check(&device_list);
6668		if (r)
6669			goto end_reset;
6670	}
6671
6672	/* Cannot be called after locking reset domain */
6673	amdgpu_ras_pre_reset(adev, &device_list);
6674
6675	/* We need to lock reset domain only once both for XGMI and single device */
6676	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6677
6678	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
6679				      hive, need_emergency_restart);
6680	if (need_emergency_restart)
6681		goto skip_sched_resume;
6682	/*
6683	 * Must check guilty signal here since after this point all old
6684	 * HW fences are force signaled.
6685	 *
6686	 * job->base holds a reference to parent fence
6687	 */
6688	if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
6689		job_signaled = true;
6690		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
6691		goto skip_hw_reset;
6692	}
6693
6694	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
6695	if (r)
6696		goto reset_unlock;
6697skip_hw_reset:
6698	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
6699	if (r)
6700		goto reset_unlock;
6701skip_sched_resume:
6702	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
6703reset_unlock:
6704	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6705	amdgpu_ras_post_reset(adev, &device_list);
6706end_reset:
6707	if (hive) {
6708		mutex_unlock(&hive->hive_lock);
6709		amdgpu_put_xgmi_hive(hive);
6710	}
6711
6712	if (r)
6713		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6714
6715	atomic_set(&adev->reset_domain->reset_res, r);
6716
6717	if (!r) {
6718		struct amdgpu_task_info *ti = NULL;
6719
6720		/*
6721		 * The job may already be freed at this point via the sched tdr workqueue so
6722		 * use the cached pasid.
6723		 */
6724		if (pasid >= 0)
6725			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
6726
6727		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
6728				     ti ? &ti->task : NULL);
6729
6730		amdgpu_vm_put_task_info(ti);
6731	}
6732
6733	return r;
6734}
6735
6736/**
6737 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6738 *
6739 * @adev: amdgpu_device pointer
6740 * @speed: pointer to the speed of the link
6741 * @width: pointer to the width of the link
6742 *
6743 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6744 * first physical partner to an AMD dGPU.
6745 * This will exclude any virtual switches and links.
6746 */
6747static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6748					    enum pci_bus_speed *speed,
6749					    enum pcie_link_width *width)
6750{
6751	struct pci_dev *parent = adev->pdev;
6752
6753	if (!speed || !width)
6754		return;
6755
6756	*speed = PCI_SPEED_UNKNOWN;
6757	*width = PCIE_LNK_WIDTH_UNKNOWN;
6758
6759	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6760		while ((parent = pci_upstream_bridge(parent))) {
6761			/* skip upstream/downstream switches internal to dGPU*/
6762			if (parent->vendor == PCI_VENDOR_ID_ATI)
6763				continue;
6764			*speed = pcie_get_speed_cap(parent);
6765			*width = pcie_get_width_cap(parent);
6766			break;
6767		}
6768	} else {
6769		/* use the current speeds rather than max if switching is not supported */
6770		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6771	}
6772}
6773
6774/**
6775 * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
6776 *
6777 * @adev: amdgpu_device pointer
6778 * @speed: pointer to the speed of the link
6779 * @width: pointer to the width of the link
6780 *
6781 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6782 * AMD dGPU which may be a virtual upstream bridge.
6783 */
6784static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
6785					enum pci_bus_speed *speed,
6786					enum pcie_link_width *width)
6787{
6788	struct pci_dev *parent = adev->pdev;
6789
6790	if (!speed || !width)
6791		return;
6792
6793	parent = pci_upstream_bridge(parent);
6794	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
6795		/* use the upstream/downstream switches internal to dGPU */
6796		*speed = pcie_get_speed_cap(parent);
6797		*width = pcie_get_width_cap(parent);
6798		while ((parent = pci_upstream_bridge(parent))) {
6799			if (parent->vendor == PCI_VENDOR_ID_ATI) {
6800				/* use the upstream/downstream switches internal to dGPU */
6801				*speed = pcie_get_speed_cap(parent);
6802				*width = pcie_get_width_cap(parent);
6803			}
6804		}
6805	} else {
6806		/* use the device itself */
6807		*speed = pcie_get_speed_cap(adev->pdev);
6808		*width = pcie_get_width_cap(adev->pdev);
6809	}
6810}
6811
6812/**
6813 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6814 *
6815 * @adev: amdgpu_device pointer
6816 *
6817 * Fetches and stores in the driver the PCIE capabilities (gen speed
6818 * and lanes) of the slot the device is in. Handles APUs and
6819 * virtualized environments where PCIE config space may not be available.
6820 */
6821static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6822{
6823	enum pci_bus_speed speed_cap, platform_speed_cap;
6824	enum pcie_link_width platform_link_width, link_width;
6825
6826	if (amdgpu_pcie_gen_cap)
6827		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6828
6829	if (amdgpu_pcie_lane_cap)
6830		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6831
6832	/* covers APUs as well */
6833	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6834		if (adev->pm.pcie_gen_mask == 0)
6835			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6836		if (adev->pm.pcie_mlw_mask == 0)
6837			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6838		return;
6839	}
6840
6841	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6842		return;
6843
6844	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6845					&platform_link_width);
6846	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6847
6848	if (adev->pm.pcie_gen_mask == 0) {
6849		/* asic caps */
6850		if (speed_cap == PCI_SPEED_UNKNOWN) {
6851			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6852						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6853						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6854		} else {
6855			if (speed_cap == PCIE_SPEED_32_0GT)
6856				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6857							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6858							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6859							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6860							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6861			else if (speed_cap == PCIE_SPEED_16_0GT)
6862				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6863							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6864							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6865							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6866			else if (speed_cap == PCIE_SPEED_8_0GT)
6867				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6868							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6869							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6870			else if (speed_cap == PCIE_SPEED_5_0GT)
6871				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6872							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6873			else
6874				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6875		}
6876		/* platform caps */
6877		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6878			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6879						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6880		} else {
6881			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6882				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6883							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6884							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6885							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6886							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6887			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6888				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6889							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6890							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6891							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6892			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6893				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6894							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6895							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6896			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6897				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6898							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6899			else
6900				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6901
6902		}
6903	}
6904	if (adev->pm.pcie_mlw_mask == 0) {
6905		/* asic caps */
6906		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6907			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6908		} else {
6909			switch (link_width) {
6910			case PCIE_LNK_X32:
6911				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6912							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6913							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6914							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6915							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6916							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6917							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6918				break;
6919			case PCIE_LNK_X16:
6920				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6921							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6922							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6923							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6924							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6925							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6926				break;
6927			case PCIE_LNK_X12:
6928				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6929							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6930							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6931							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6932							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6933				break;
6934			case PCIE_LNK_X8:
6935				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6936							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6937							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6938							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6939				break;
6940			case PCIE_LNK_X4:
6941				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6942							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6943							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6944				break;
6945			case PCIE_LNK_X2:
6946				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6947							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6948				break;
6949			case PCIE_LNK_X1:
6950				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6951				break;
6952			default:
6953				break;
6954			}
6955		}
6956		/* platform caps */
6957		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6958			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6959		} else {
6960			switch (platform_link_width) {
6961			case PCIE_LNK_X32:
6962				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6963							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6964							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6965							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6966							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6967							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6968							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6969				break;
6970			case PCIE_LNK_X16:
6971				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6972							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6973							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6974							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6975							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6976							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6977				break;
6978			case PCIE_LNK_X12:
6979				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6980							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6981							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6982							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6983							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6984				break;
6985			case PCIE_LNK_X8:
6986				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6987							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6988							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6989							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6990				break;
6991			case PCIE_LNK_X4:
6992				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6993							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6994							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6995				break;
6996			case PCIE_LNK_X2:
6997				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6998							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6999				break;
7000			case PCIE_LNK_X1:
7001				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
7002				break;
7003			default:
7004				break;
7005			}
7006		}
7007	}
7008}
7009
7010/**
7011 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
7012 *
7013 * @adev: amdgpu_device pointer
7014 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
7015 *
7016 * Return true if @peer_adev can access (DMA) @adev through the PCIe
7017 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
7018 * @peer_adev.
7019 */
7020bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
7021				      struct amdgpu_device *peer_adev)
7022{
7023#ifdef CONFIG_HSA_AMD_P2P
7024	bool p2p_access =
7025		!adev->gmc.xgmi.connected_to_cpu &&
7026		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
7027	if (!p2p_access)
7028		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
7029			pci_name(peer_adev->pdev));
7030
7031	bool is_large_bar = adev->gmc.visible_vram_size &&
7032		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
7033	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
7034
7035	if (!p2p_addressable) {
7036		uint64_t address_mask = peer_adev->dev->dma_mask ?
7037			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
7038		resource_size_t aper_limit =
7039			adev->gmc.aper_base + adev->gmc.aper_size - 1;
7040
7041		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
7042				     aper_limit & address_mask);
7043	}
7044	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
7045#else
7046	return false;
7047#endif
7048}
7049
7050int amdgpu_device_baco_enter(struct amdgpu_device *adev)
7051{
7052	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
7053
7054	if (!amdgpu_device_supports_baco(adev))
7055		return -ENOTSUPP;
7056
7057	if (ras && adev->ras_enabled &&
7058	    adev->nbio.funcs->enable_doorbell_interrupt)
7059		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
7060
7061	return amdgpu_dpm_baco_enter(adev);
7062}
7063
7064int amdgpu_device_baco_exit(struct amdgpu_device *adev)
7065{
7066	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
7067	int ret = 0;
7068
7069	if (!amdgpu_device_supports_baco(adev))
7070		return -ENOTSUPP;
7071
7072	ret = amdgpu_dpm_baco_exit(adev);
7073	if (ret)
7074		return ret;
7075
7076	if (ras && adev->ras_enabled &&
7077	    adev->nbio.funcs->enable_doorbell_interrupt)
7078		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
7079
7080	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
7081	    adev->nbio.funcs->clear_doorbell_interrupt)
7082		adev->nbio.funcs->clear_doorbell_interrupt(adev);
7083
7084	return 0;
7085}
7086
7087/**
7088 * amdgpu_pci_error_detected - Called when a PCI error is detected.
7089 * @pdev: PCI device struct
7090 * @state: PCI channel state
7091 *
7092 * Description: Called when a PCI error is detected.
7093 *
7094 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
7095 */
7096pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
7097{
7098	struct drm_device *dev = pci_get_drvdata(pdev);
7099	struct amdgpu_device *adev = drm_to_adev(dev);
7100	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
7101		amdgpu_get_xgmi_hive(adev);
7102	struct amdgpu_reset_context reset_context;
7103	struct list_head device_list;
7104
7105	dev_info(adev->dev, "PCI error: detected callback!!\n");
7106
7107	adev->pci_channel_state = state;
7108
7109	switch (state) {
7110	case pci_channel_io_normal:
7111		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
7112		return PCI_ERS_RESULT_CAN_RECOVER;
7113	case pci_channel_io_frozen:
7114		/* Fatal error, prepare for slot reset */
7115		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
7116		if (hive) {
7117			/* Hive devices should be able to support FW based
7118			 * link reset on other devices, if not return.
7119			 */
7120			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
7121				dev_warn(adev->dev,
7122					 "No support for XGMI hive yet...\n");
7123				return PCI_ERS_RESULT_DISCONNECT;
7124			}
7125			/* Set dpc status only if device is part of hive
7126			 * Non-hive devices should be able to recover after
7127			 * link reset.
7128			 */
7129			amdgpu_reset_set_dpc_status(adev, true);
7130
7131			mutex_lock(&hive->hive_lock);
7132		}
7133		memset(&reset_context, 0, sizeof(reset_context));
7134		INIT_LIST_HEAD(&device_list);
7135
7136		amdgpu_device_recovery_prepare(adev, &device_list, hive);
7137		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
7138		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
7139					      hive, false);
7140		if (hive)
7141			mutex_unlock(&hive->hive_lock);
7142		return PCI_ERS_RESULT_NEED_RESET;
7143	case pci_channel_io_perm_failure:
7144		/* Permanent error, prepare for device removal */
7145		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
7146		return PCI_ERS_RESULT_DISCONNECT;
7147	}
7148
7149	return PCI_ERS_RESULT_NEED_RESET;
7150}
7151
7152/**
7153 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
7154 * @pdev: pointer to PCI device
7155 */
7156pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
7157{
7158	struct drm_device *dev = pci_get_drvdata(pdev);
7159	struct amdgpu_device *adev = drm_to_adev(dev);
7160
7161	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
7162
7163	/* TODO - dump whatever for debugging purposes */
7164
7165	/* This called only if amdgpu_pci_error_detected returns
7166	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
7167	 * works, no need to reset slot.
7168	 */
7169
7170	return PCI_ERS_RESULT_RECOVERED;
7171}
7172
7173/**
7174 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
7175 * @pdev: PCI device struct
7176 *
7177 * Description: This routine is called by the pci error recovery
7178 * code after the PCI slot has been reset, just before we
7179 * should resume normal operations.
7180 */
7181pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
7182{
7183	struct drm_device *dev = pci_get_drvdata(pdev);
7184	struct amdgpu_device *adev = drm_to_adev(dev);
7185	struct amdgpu_reset_context reset_context;
7186	struct amdgpu_device *tmp_adev;
7187	struct amdgpu_hive_info *hive;
7188	struct list_head device_list;
7189	struct pci_dev *link_dev;
7190	int r = 0, i, timeout;
7191	u32 memsize;
7192	u16 status;
7193
7194	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
7195
7196	memset(&reset_context, 0, sizeof(reset_context));
7197
7198	if (adev->pcie_reset_ctx.swus)
7199		link_dev = adev->pcie_reset_ctx.swus;
7200	else
7201		link_dev = adev->pdev;
7202	/* wait for asic to come out of reset, timeout = 10s */
7203	timeout = 10000;
7204	do {
7205		usleep_range(10000, 10500);
7206		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
7207		timeout -= 10;
7208	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
7209		 (status != PCI_VENDOR_ID_AMD));
7210
7211	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
7212		r = -ETIME;
7213		goto out;
7214	}
7215
7216	amdgpu_device_load_switch_state(adev);
7217	/* Restore PCI confspace */
7218	amdgpu_device_load_pci_state(pdev);
7219
7220	/* confirm  ASIC came out of reset */
7221	for (i = 0; i < adev->usec_timeout; i++) {
7222		memsize = amdgpu_asic_get_config_memsize(adev);
7223
7224		if (memsize != 0xffffffff)
7225			break;
7226		udelay(1);
7227	}
7228	if (memsize == 0xffffffff) {
7229		r = -ETIME;
7230		goto out;
7231	}
7232
7233	reset_context.method = AMD_RESET_METHOD_NONE;
7234	reset_context.reset_req_dev = adev;
7235	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
7236	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
7237	INIT_LIST_HEAD(&device_list);
7238
7239	hive = amdgpu_get_xgmi_hive(adev);
7240	if (hive) {
7241		mutex_lock(&hive->hive_lock);
7242		reset_context.hive = hive;
7243		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7244			tmp_adev->pcie_reset_ctx.in_link_reset = true;
7245			list_add_tail(&tmp_adev->reset_list, &device_list);
7246		}
7247	} else {
7248		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
7249		list_add_tail(&adev->reset_list, &device_list);
7250	}
7251
7252	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
7253out:
7254	if (!r) {
7255		if (amdgpu_device_cache_pci_state(adev->pdev))
7256			pci_restore_state(adev->pdev);
7257		dev_info(adev->dev, "PCIe error recovery succeeded\n");
7258	} else {
7259		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
7260		if (hive) {
7261			list_for_each_entry(tmp_adev, &device_list, reset_list)
7262				amdgpu_device_unset_mp1_state(tmp_adev);
7263		}
7264		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
7265	}
7266
7267	if (hive) {
7268		mutex_unlock(&hive->hive_lock);
7269		amdgpu_put_xgmi_hive(hive);
7270	}
7271
7272	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
7273}
7274
7275/**
7276 * amdgpu_pci_resume() - resume normal ops after PCI reset
7277 * @pdev: pointer to PCI device
7278 *
7279 * Called when the error recovery driver tells us that its
7280 * OK to resume normal operation.
7281 */
7282void amdgpu_pci_resume(struct pci_dev *pdev)
7283{
7284	struct drm_device *dev = pci_get_drvdata(pdev);
7285	struct amdgpu_device *adev = drm_to_adev(dev);
7286	struct list_head device_list;
7287	struct amdgpu_hive_info *hive = NULL;
7288	struct amdgpu_device *tmp_adev = NULL;
7289
7290	dev_info(adev->dev, "PCI error: resume callback!!\n");
7291
7292	/* Only continue execution for the case of pci_channel_io_frozen */
7293	if (adev->pci_channel_state != pci_channel_io_frozen)
7294		return;
7295
7296	INIT_LIST_HEAD(&device_list);
7297
7298	hive = amdgpu_get_xgmi_hive(adev);
7299	if (hive) {
7300		mutex_lock(&hive->hive_lock);
7301		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7302			tmp_adev->pcie_reset_ctx.in_link_reset = false;
7303			list_add_tail(&tmp_adev->reset_list, &device_list);
7304		}
7305	} else
7306		list_add_tail(&adev->reset_list, &device_list);
7307
7308	amdgpu_device_sched_resume(&device_list, NULL, NULL);
7309	amdgpu_device_gpu_resume(adev, &device_list, false);
7310	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
7311
7312	if (hive) {
7313		mutex_unlock(&hive->hive_lock);
7314		amdgpu_put_xgmi_hive(hive);
7315	}
7316}
7317
7318static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
7319{
7320	struct pci_dev *swus, *swds;
7321	int r;
7322
7323	swds = pci_upstream_bridge(adev->pdev);
7324	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
7325	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
7326		return;
7327	swus = pci_upstream_bridge(swds);
7328	if (!swus ||
7329	    (swus->vendor != PCI_VENDOR_ID_ATI &&
7330	     swus->vendor != PCI_VENDOR_ID_AMD) ||
7331	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
7332		return;
7333
7334	/* If already saved, return */
7335	if (adev->pcie_reset_ctx.swus)
7336		return;
7337	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
7338	r = pci_save_state(swds);
7339	if (r)
7340		return;
7341	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
7342
7343	r = pci_save_state(swus);
7344	if (r)
7345		return;
7346	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
7347
7348	adev->pcie_reset_ctx.swus = swus;
7349}
7350
7351static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
7352{
7353	struct pci_dev *pdev;
7354	int r;
7355
7356	if (!adev->pcie_reset_ctx.swds_pcistate ||
7357	    !adev->pcie_reset_ctx.swus_pcistate)
7358		return;
7359
7360	pdev = adev->pcie_reset_ctx.swus;
7361	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
7362	if (!r) {
7363		pci_restore_state(pdev);
7364	} else {
7365		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
7366		return;
7367	}
7368
7369	pdev = pci_upstream_bridge(adev->pdev);
7370	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
7371	if (!r)
7372		pci_restore_state(pdev);
7373	else
7374		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
7375}
7376
7377bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
7378{
7379	struct drm_device *dev = pci_get_drvdata(pdev);
7380	struct amdgpu_device *adev = drm_to_adev(dev);
7381	int r;
7382
7383	if (amdgpu_sriov_vf(adev))
7384		return false;
7385
7386	r = pci_save_state(pdev);
7387	if (!r) {
7388		kfree(adev->pci_state);
7389
7390		adev->pci_state = pci_store_saved_state(pdev);
7391
7392		if (!adev->pci_state) {
7393			dev_err(adev->dev, "Failed to store PCI saved state");
7394			return false;
7395		}
7396	} else {
7397		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
7398		return false;
7399	}
7400
7401	amdgpu_device_cache_switch_state(adev);
7402
7403	return true;
7404}
7405
7406bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
7407{
7408	struct drm_device *dev = pci_get_drvdata(pdev);
7409	struct amdgpu_device *adev = drm_to_adev(dev);
7410	int r;
7411
7412	if (!adev->pci_state)
7413		return false;
7414
7415	r = pci_load_saved_state(pdev, adev->pci_state);
7416
7417	if (!r) {
7418		pci_restore_state(pdev);
7419	} else {
7420		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
7421		return false;
7422	}
7423
7424	return true;
7425}
7426
7427void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
7428		struct amdgpu_ring *ring)
7429{
7430#ifdef CONFIG_X86_64
7431	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7432		return;
7433#endif
7434	if (adev->gmc.xgmi.connected_to_cpu)
7435		return;
7436
7437	if (ring && ring->funcs->emit_hdp_flush) {
7438		amdgpu_ring_emit_hdp_flush(ring);
7439		return;
7440	}
7441
7442	if (!ring && amdgpu_sriov_runtime(adev)) {
7443		if (!amdgpu_kiq_hdp_flush(adev))
7444			return;
7445	}
7446
7447	amdgpu_hdp_flush(adev, ring);
7448}
7449
7450void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
7451		struct amdgpu_ring *ring)
7452{
7453#ifdef CONFIG_X86_64
7454	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7455		return;
7456#endif
7457	if (adev->gmc.xgmi.connected_to_cpu)
7458		return;
7459
7460	amdgpu_hdp_invalidate(adev, ring);
7461}
7462
7463int amdgpu_in_reset(struct amdgpu_device *adev)
7464{
7465	return atomic_read(&adev->reset_domain->in_gpu_reset);
7466}
7467
7468/**
7469 * amdgpu_device_halt() - bring hardware to some kind of halt state
7470 *
7471 * @adev: amdgpu_device pointer
7472 *
7473 * Bring hardware to some kind of halt state so that no one can touch it
7474 * any more. It will help to maintain error context when error occurred.
7475 * Compare to a simple hang, the system will keep stable at least for SSH
7476 * access. Then it should be trivial to inspect the hardware state and
7477 * see what's going on. Implemented as following:
7478 *
7479 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
7480 *    clears all CPU mappings to device, disallows remappings through page faults
7481 * 2. amdgpu_irq_disable_all() disables all interrupts
7482 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
7483 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
7484 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
7485 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
7486 *    flush any in flight DMA operations
7487 */
7488void amdgpu_device_halt(struct amdgpu_device *adev)
7489{
7490	struct pci_dev *pdev = adev->pdev;
7491	struct drm_device *ddev = adev_to_drm(adev);
7492
7493	amdgpu_xcp_dev_unplug(adev);
7494	drm_dev_unplug(ddev);
7495
7496	amdgpu_irq_disable_all(adev);
7497
7498	amdgpu_fence_driver_hw_fini(adev);
7499
7500	adev->no_hw_access = true;
7501
7502	amdgpu_device_unmap_mmio(adev);
7503
7504	pci_disable_device(pdev);
7505	pci_wait_for_pending_transaction(pdev);
7506}
7507
7508u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
7509				u32 reg)
7510{
7511	unsigned long flags, address, data;
7512	u32 r;
7513
7514	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7515	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7516
7517	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7518	WREG32(address, reg * 4);
7519	(void)RREG32(address);
7520	r = RREG32(data);
7521	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7522	return r;
7523}
7524
7525void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
7526				u32 reg, u32 v)
7527{
7528	unsigned long flags, address, data;
7529
7530	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7531	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7532
7533	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7534	WREG32(address, reg * 4);
7535	(void)RREG32(address);
7536	WREG32(data, v);
7537	(void)RREG32(data);
7538	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7539}
7540
7541/**
7542 * amdgpu_device_get_gang - return a reference to the current gang
7543 * @adev: amdgpu_device pointer
7544 *
7545 * Returns: A new reference to the current gang leader.
7546 */
7547struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
7548{
7549	struct dma_fence *fence;
7550
7551	rcu_read_lock();
7552	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
7553	rcu_read_unlock();
7554	return fence;
7555}
7556
7557/**
7558 * amdgpu_device_switch_gang - switch to a new gang
7559 * @adev: amdgpu_device pointer
7560 * @gang: the gang to switch to
7561 *
7562 * Try to switch to a new gang.
7563 * Returns: NULL if we switched to the new gang or a reference to the current
7564 * gang leader.
7565 */
7566struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
7567					    struct dma_fence *gang)
7568{
7569	struct dma_fence *old = NULL;
7570
7571	dma_fence_get(gang);
7572	do {
7573		dma_fence_put(old);
7574		old = amdgpu_device_get_gang(adev);
7575		if (old == gang)
7576			break;
7577
7578		if (!dma_fence_is_signaled(old)) {
7579			dma_fence_put(gang);
7580			return old;
7581		}
7582
7583	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
7584			 old, gang) != old);
7585
7586	/*
7587	 * Drop it once for the exchanged reference in adev and once for the
7588	 * thread local reference acquired in amdgpu_device_get_gang().
7589	 */
7590	dma_fence_put(old);
7591	dma_fence_put(old);
7592	return NULL;
7593}
7594
7595/**
7596 * amdgpu_device_enforce_isolation - enforce HW isolation
7597 * @adev: the amdgpu device pointer
7598 * @ring: the HW ring the job is supposed to run on
7599 * @job: the job which is about to be pushed to the HW ring
7600 *
7601 * Makes sure that only one client at a time can use the GFX block.
7602 * Returns: The dependency to wait on before the job can be pushed to the HW.
7603 * The function is called multiple times until NULL is returned.
7604 */
7605struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
7606						  struct amdgpu_ring *ring,
7607						  struct amdgpu_job *job)
7608{
7609	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
7610	struct drm_sched_fence *f = job->base.s_fence;
7611	struct dma_fence *dep;
7612	void *owner;
7613	int r;
7614
7615	/*
7616	 * For now enforce isolation only for the GFX block since we only need
7617	 * the cleaner shader on those rings.
7618	 */
7619	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
7620	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
7621		return NULL;
7622
7623	/*
7624	 * All submissions where enforce isolation is false are handled as if
7625	 * they come from a single client. Use ~0l as the owner to distinct it
7626	 * from kernel submissions where the owner is NULL.
7627	 */
7628	owner = job->enforce_isolation ? f->owner : (void *)~0l;
7629
7630	mutex_lock(&adev->enforce_isolation_mutex);
7631
7632	/*
7633	 * The "spearhead" submission is the first one which changes the
7634	 * ownership to its client. We always need to wait for it to be
7635	 * pushed to the HW before proceeding with anything.
7636	 */
7637	if (&f->scheduled != isolation->spearhead &&
7638	    !dma_fence_is_signaled(isolation->spearhead)) {
7639		dep = isolation->spearhead;
7640		goto out_grab_ref;
7641	}
7642
7643	if (isolation->owner != owner) {
7644
7645		/*
7646		 * Wait for any gang to be assembled before switching to a
7647		 * different owner or otherwise we could deadlock the
7648		 * submissions.
7649		 */
7650		if (!job->gang_submit) {
7651			dep = amdgpu_device_get_gang(adev);
7652			if (!dma_fence_is_signaled(dep))
7653				goto out_return_dep;
7654			dma_fence_put(dep);
7655		}
7656
7657		dma_fence_put(isolation->spearhead);
7658		isolation->spearhead = dma_fence_get(&f->scheduled);
7659		amdgpu_sync_move(&isolation->active, &isolation->prev);
7660		trace_amdgpu_isolation(isolation->owner, owner);
7661		isolation->owner = owner;
7662	}
7663
7664	/*
7665	 * Specifying the ring here helps to pipeline submissions even when
7666	 * isolation is enabled. If that is not desired for testing NULL can be
7667	 * used instead of the ring to enforce a CPU round trip while switching
7668	 * between clients.
7669	 */
7670	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
7671	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
7672	if (r)
7673		dev_warn(adev->dev, "OOM tracking isolation\n");
7674
7675out_grab_ref:
7676	dma_fence_get(dep);
7677out_return_dep:
7678	mutex_unlock(&adev->enforce_isolation_mutex);
7679	return dep;
7680}
7681
7682bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
7683{
7684	switch (adev->asic_type) {
7685#ifdef CONFIG_DRM_AMDGPU_SI
7686	case CHIP_HAINAN:
7687#endif
7688	case CHIP_TOPAZ:
7689		/* chips with no display hardware */
7690		return false;
7691#ifdef CONFIG_DRM_AMDGPU_SI
7692	case CHIP_TAHITI:
7693	case CHIP_PITCAIRN:
7694	case CHIP_VERDE:
7695	case CHIP_OLAND:
7696#endif
7697#ifdef CONFIG_DRM_AMDGPU_CIK
7698	case CHIP_BONAIRE:
7699	case CHIP_HAWAII:
7700	case CHIP_KAVERI:
7701	case CHIP_KABINI:
7702	case CHIP_MULLINS:
7703#endif
7704	case CHIP_TONGA:
7705	case CHIP_FIJI:
7706	case CHIP_POLARIS10:
7707	case CHIP_POLARIS11:
7708	case CHIP_POLARIS12:
7709	case CHIP_VEGAM:
7710	case CHIP_CARRIZO:
7711	case CHIP_STONEY:
7712		/* chips with display hardware */
7713		return true;
7714	default:
7715		/* IP discovery */
7716		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
7717		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
7718			return false;
7719		return true;
7720	}
7721}
7722
7723uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
7724		uint32_t inst, uint32_t reg_addr, char reg_name[],
7725		uint32_t expected_value, uint32_t mask)
7726{
7727	uint32_t ret = 0;
7728	uint32_t old_ = 0;
7729	uint32_t tmp_ = RREG32(reg_addr);
7730	uint32_t loop = adev->usec_timeout;
7731
7732	while ((tmp_ & (mask)) != (expected_value)) {
7733		if (old_ != tmp_) {
7734			loop = adev->usec_timeout;
7735			old_ = tmp_;
7736		} else
7737			udelay(1);
7738		tmp_ = RREG32(reg_addr);
7739		loop--;
7740		if (!loop) {
7741			dev_warn(
7742				adev->dev,
7743				"Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
7744				inst, reg_name, (uint32_t)expected_value,
7745				(uint32_t)(tmp_ & (mask)));
7746			ret = -ETIMEDOUT;
7747			break;
7748		}
7749	}
7750	return ret;
7751}
7752
7753ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
7754{
7755	ssize_t size = 0;
7756
7757	if (!ring || !ring->adev)
7758		return size;
7759
7760	if (amdgpu_device_should_recover_gpu(ring->adev))
7761		size |= AMDGPU_RESET_TYPE_FULL;
7762
7763	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
7764	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
7765		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
7766
7767	return size;
7768}
7769
7770ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
7771{
7772	ssize_t size = 0;
7773
7774	if (supported_reset == 0) {
7775		size += sysfs_emit_at(buf, size, "unsupported");
7776		size += sysfs_emit_at(buf, size, "\n");
7777		return size;
7778
7779	}
7780
7781	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
7782		size += sysfs_emit_at(buf, size, "soft ");
7783
7784	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
7785		size += sysfs_emit_at(buf, size, "queue ");
7786
7787	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
7788		size += sysfs_emit_at(buf, size, "pipe ");
7789
7790	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
7791		size += sysfs_emit_at(buf, size, "full ");
7792
7793	size += sysfs_emit_at(buf, size, "\n");
7794	return size;
7795}
7796
7797void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
7798			   enum amdgpu_uid_type type, uint8_t inst,
7799			   uint64_t uid)
7800{
7801	if (!uid_info)
7802		return;
7803
7804	if (type >= AMDGPU_UID_TYPE_MAX) {
7805		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7806			     type);
7807		return;
7808	}
7809
7810	if (inst >= AMDGPU_UID_INST_MAX) {
7811		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7812			     inst);
7813		return;
7814	}
7815
7816	if (uid_info->uid[type][inst] != 0) {
7817		dev_warn_once(
7818			uid_info->adev->dev,
7819			"Overwriting existing UID %llu for type %d instance %d\n",
7820			uid_info->uid[type][inst], type, inst);
7821	}
7822
7823	uid_info->uid[type][inst] = uid;
7824}
7825
7826u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
7827			  enum amdgpu_uid_type type, uint8_t inst)
7828{
7829	if (!uid_info)
7830		return 0;
7831
7832	if (type >= AMDGPU_UID_TYPE_MAX) {
7833		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7834			     type);
7835		return 0;
7836	}
7837
7838	if (inst >= AMDGPU_UID_INST_MAX) {
7839		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7840			     inst);
7841		return 0;
7842	}
7843
7844	return uid_info->uid[type][inst];
7845}