drivers/gpu/drm/amd/amdgpu/amdgpu_device.c at v6.13-rc1 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
at v6.13-rc1 6789 lines 187 kB view raw
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include <linux/aperture.h>
  30#include <linux/power_supply.h>
  31#include <linux/kthread.h>
  32#include <linux/module.h>
  33#include <linux/console.h>
  34#include <linux/slab.h>
  35#include <linux/iommu.h>
  36#include <linux/pci.h>
  37#include <linux/pci-p2pdma.h>
  38#include <linux/apple-gmux.h>
  39
  40#include <drm/drm_atomic_helper.h>
  41#include <drm/drm_client_event.h>
  42#include <drm/drm_crtc_helper.h>
  43#include <drm/drm_probe_helper.h>
  44#include <drm/amdgpu_drm.h>
  45#include <linux/device.h>
  46#include <linux/vgaarb.h>
  47#include <linux/vga_switcheroo.h>
  48#include <linux/efi.h>
  49#include "amdgpu.h"
  50#include "amdgpu_trace.h"
  51#include "amdgpu_i2c.h"
  52#include "atom.h"
  53#include "amdgpu_atombios.h"
  54#include "amdgpu_atomfirmware.h"
  55#include "amd_pcie.h"
  56#ifdef CONFIG_DRM_AMDGPU_SI
  57#include "si.h"
  58#endif
  59#ifdef CONFIG_DRM_AMDGPU_CIK
  60#include "cik.h"
  61#endif
  62#include "vi.h"
  63#include "soc15.h"
  64#include "nv.h"
  65#include "bif/bif_4_1_d.h"
  66#include <linux/firmware.h>
  67#include "amdgpu_vf_error.h"
  68
  69#include "amdgpu_amdkfd.h"
  70#include "amdgpu_pm.h"
  71
  72#include "amdgpu_xgmi.h"
  73#include "amdgpu_ras.h"
  74#include "amdgpu_pmu.h"
  75#include "amdgpu_fru_eeprom.h"
  76#include "amdgpu_reset.h"
  77#include "amdgpu_virt.h"
  78#include "amdgpu_dev_coredump.h"
  79
  80#include <linux/suspend.h>
  81#include <drm/task_barrier.h>
  82#include <linux/pm_runtime.h>
  83
  84#include <drm/drm_drv.h>
  85
  86#if IS_ENABLED(CONFIG_X86)
  87#include <asm/intel-family.h>
  88#endif
  89
  90MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  91MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  92MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  93MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  94MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  95MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  96MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  97
  98#define AMDGPU_RESUME_MS		2000
  99#define AMDGPU_MAX_RETRY_LIMIT		2
 100#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 101#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
 102#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
 103#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
 104
 105static const struct drm_driver amdgpu_kms_driver;
 106
 107const char *amdgpu_asic_name[] = {
 108	"TAHITI",
 109	"PITCAIRN",
 110	"VERDE",
 111	"OLAND",
 112	"HAINAN",
 113	"BONAIRE",
 114	"KAVERI",
 115	"KABINI",
 116	"HAWAII",
 117	"MULLINS",
 118	"TOPAZ",
 119	"TONGA",
 120	"FIJI",
 121	"CARRIZO",
 122	"STONEY",
 123	"POLARIS10",
 124	"POLARIS11",
 125	"POLARIS12",
 126	"VEGAM",
 127	"VEGA10",
 128	"VEGA12",
 129	"VEGA20",
 130	"RAVEN",
 131	"ARCTURUS",
 132	"RENOIR",
 133	"ALDEBARAN",
 134	"NAVI10",
 135	"CYAN_SKILLFISH",
 136	"NAVI14",
 137	"NAVI12",
 138	"SIENNA_CICHLID",
 139	"NAVY_FLOUNDER",
 140	"VANGOGH",
 141	"DIMGREY_CAVEFISH",
 142	"BEIGE_GOBY",
 143	"YELLOW_CARP",
 144	"IP DISCOVERY",
 145	"LAST",
 146};
 147
 148#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
 149/*
 150 * Default init level where all blocks are expected to be initialized. This is
 151 * the level of initialization expected by default and also after a full reset
 152 * of the device.
 153 */
 154struct amdgpu_init_level amdgpu_init_default = {
 155	.level = AMDGPU_INIT_LEVEL_DEFAULT,
 156	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
 157};
 158
 159struct amdgpu_init_level amdgpu_init_recovery = {
 160	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
 161	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
 162};
 163
 164/*
 165 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
 166 * is used for cases like reset on initialization where the entire hive needs to
 167 * be reset before first use.
 168 */
 169struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
 170	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
 171	.hwini_ip_block_mask =
 172		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
 173		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
 174		BIT(AMD_IP_BLOCK_TYPE_PSP)
 175};
 176
 177static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
 178					     enum amd_ip_block_type block)
 179{
 180	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
 181}
 182
 183void amdgpu_set_init_level(struct amdgpu_device *adev,
 184			   enum amdgpu_init_lvl_id lvl)
 185{
 186	switch (lvl) {
 187	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
 188		adev->init_lvl = &amdgpu_init_minimal_xgmi;
 189		break;
 190	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
 191		adev->init_lvl = &amdgpu_init_recovery;
 192		break;
 193	case AMDGPU_INIT_LEVEL_DEFAULT:
 194		fallthrough;
 195	default:
 196		adev->init_lvl = &amdgpu_init_default;
 197		break;
 198	}
 199}
 200
 201static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
 202
 203/**
 204 * DOC: pcie_replay_count
 205 *
 206 * The amdgpu driver provides a sysfs API for reporting the total number
 207 * of PCIe replays (NAKs)
 208 * The file pcie_replay_count is used for this and returns the total
 209 * number of replays as a sum of the NAKs generated and NAKs received
 210 */
 211
 212static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 213		struct device_attribute *attr, char *buf)
 214{
 215	struct drm_device *ddev = dev_get_drvdata(dev);
 216	struct amdgpu_device *adev = drm_to_adev(ddev);
 217	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 218
 219	return sysfs_emit(buf, "%llu\n", cnt);
 220}
 221
 222static DEVICE_ATTR(pcie_replay_count, 0444,
 223		amdgpu_device_get_pcie_replay_count, NULL);
 224
 225static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
 226					  struct bin_attribute *attr, char *buf,
 227					  loff_t ppos, size_t count)
 228{
 229	struct device *dev = kobj_to_dev(kobj);
 230	struct drm_device *ddev = dev_get_drvdata(dev);
 231	struct amdgpu_device *adev = drm_to_adev(ddev);
 232	ssize_t bytes_read;
 233
 234	switch (ppos) {
 235	case AMDGPU_SYS_REG_STATE_XGMI:
 236		bytes_read = amdgpu_asic_get_reg_state(
 237			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
 238		break;
 239	case AMDGPU_SYS_REG_STATE_WAFL:
 240		bytes_read = amdgpu_asic_get_reg_state(
 241			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
 242		break;
 243	case AMDGPU_SYS_REG_STATE_PCIE:
 244		bytes_read = amdgpu_asic_get_reg_state(
 245			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
 246		break;
 247	case AMDGPU_SYS_REG_STATE_USR:
 248		bytes_read = amdgpu_asic_get_reg_state(
 249			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
 250		break;
 251	case AMDGPU_SYS_REG_STATE_USR_1:
 252		bytes_read = amdgpu_asic_get_reg_state(
 253			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
 254		break;
 255	default:
 256		return -EINVAL;
 257	}
 258
 259	return bytes_read;
 260}
 261
 262BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
 263	 AMDGPU_SYS_REG_STATE_END);
 264
 265int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
 266{
 267	int ret;
 268
 269	if (!amdgpu_asic_get_reg_state_supported(adev))
 270		return 0;
 271
 272	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 273
 274	return ret;
 275}
 276
 277void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
 278{
 279	if (!amdgpu_asic_get_reg_state_supported(adev))
 280		return;
 281	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 282}
 283
 284int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
 285{
 286	int r;
 287
 288	if (ip_block->version->funcs->suspend) {
 289		r = ip_block->version->funcs->suspend(ip_block);
 290		if (r) {
 291			dev_err(ip_block->adev->dev,
 292				"suspend of IP block <%s> failed %d\n",
 293				ip_block->version->funcs->name, r);
 294			return r;
 295		}
 296	}
 297
 298	ip_block->status.hw = false;
 299	return 0;
 300}
 301
 302int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
 303{
 304	int r;
 305
 306	if (ip_block->version->funcs->resume) {
 307		r = ip_block->version->funcs->resume(ip_block);
 308		if (r) {
 309			dev_err(ip_block->adev->dev,
 310				"resume of IP block <%s> failed %d\n",
 311				ip_block->version->funcs->name, r);
 312			return r;
 313		}
 314	}
 315
 316	ip_block->status.hw = true;
 317	return 0;
 318}
 319
 320/**
 321 * DOC: board_info
 322 *
 323 * The amdgpu driver provides a sysfs API for giving board related information.
 324 * It provides the form factor information in the format
 325 *
 326 *   type : form factor
 327 *
 328 * Possible form factor values
 329 *
 330 * - "cem"		- PCIE CEM card
 331 * - "oam"		- Open Compute Accelerator Module
 332 * - "unknown"	- Not known
 333 *
 334 */
 335
 336static ssize_t amdgpu_device_get_board_info(struct device *dev,
 337					    struct device_attribute *attr,
 338					    char *buf)
 339{
 340	struct drm_device *ddev = dev_get_drvdata(dev);
 341	struct amdgpu_device *adev = drm_to_adev(ddev);
 342	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
 343	const char *pkg;
 344
 345	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
 346		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
 347
 348	switch (pkg_type) {
 349	case AMDGPU_PKG_TYPE_CEM:
 350		pkg = "cem";
 351		break;
 352	case AMDGPU_PKG_TYPE_OAM:
 353		pkg = "oam";
 354		break;
 355	default:
 356		pkg = "unknown";
 357		break;
 358	}
 359
 360	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
 361}
 362
 363static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
 364
 365static struct attribute *amdgpu_board_attrs[] = {
 366	&dev_attr_board_info.attr,
 367	NULL,
 368};
 369
 370static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
 371					     struct attribute *attr, int n)
 372{
 373	struct device *dev = kobj_to_dev(kobj);
 374	struct drm_device *ddev = dev_get_drvdata(dev);
 375	struct amdgpu_device *adev = drm_to_adev(ddev);
 376
 377	if (adev->flags & AMD_IS_APU)
 378		return 0;
 379
 380	return attr->mode;
 381}
 382
 383static const struct attribute_group amdgpu_board_attrs_group = {
 384	.attrs = amdgpu_board_attrs,
 385	.is_visible = amdgpu_board_attrs_is_visible
 386};
 387
 388static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 389
 390
 391/**
 392 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 393 *
 394 * @dev: drm_device pointer
 395 *
 396 * Returns true if the device is a dGPU with ATPX power control,
 397 * otherwise return false.
 398 */
 399bool amdgpu_device_supports_px(struct drm_device *dev)
 400{
 401	struct amdgpu_device *adev = drm_to_adev(dev);
 402
 403	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 404		return true;
 405	return false;
 406}
 407
 408/**
 409 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 410 *
 411 * @dev: drm_device pointer
 412 *
 413 * Returns true if the device is a dGPU with ACPI power control,
 414 * otherwise return false.
 415 */
 416bool amdgpu_device_supports_boco(struct drm_device *dev)
 417{
 418	struct amdgpu_device *adev = drm_to_adev(dev);
 419
 420	if (adev->has_pr3 ||
 421	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 422		return true;
 423	return false;
 424}
 425
 426/**
 427 * amdgpu_device_supports_baco - Does the device support BACO
 428 *
 429 * @dev: drm_device pointer
 430 *
 431 * Return:
 432 * 1 if the device supporte BACO;
 433 * 3 if the device support MACO (only works if BACO is supported)
 434 * otherwise return 0.
 435 */
 436int amdgpu_device_supports_baco(struct drm_device *dev)
 437{
 438	struct amdgpu_device *adev = drm_to_adev(dev);
 439
 440	return amdgpu_asic_supports_baco(adev);
 441}
 442
 443void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
 444{
 445	struct drm_device *dev;
 446	int bamaco_support;
 447
 448	dev = adev_to_drm(adev);
 449
 450	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
 451	bamaco_support = amdgpu_device_supports_baco(dev);
 452
 453	switch (amdgpu_runtime_pm) {
 454	case 2:
 455		if (bamaco_support & MACO_SUPPORT) {
 456			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 457			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
 458		} else if (bamaco_support == BACO_SUPPORT) {
 459			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 460			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
 461		}
 462		break;
 463	case 1:
 464		if (bamaco_support & BACO_SUPPORT) {
 465			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 466			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
 467		}
 468		break;
 469	case -1:
 470	case -2:
 471		if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
 472			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
 473			dev_info(adev->dev, "Using ATPX for runtime pm\n");
 474		} else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
 475			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
 476			dev_info(adev->dev, "Using BOCO for runtime pm\n");
 477		} else {
 478			if (!bamaco_support)
 479				goto no_runtime_pm;
 480
 481			switch (adev->asic_type) {
 482			case CHIP_VEGA20:
 483			case CHIP_ARCTURUS:
 484				/* BACO are not supported on vega20 and arctrus */
 485				break;
 486			case CHIP_VEGA10:
 487				/* enable BACO as runpm mode if noretry=0 */
 488				if (!adev->gmc.noretry)
 489					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 490				break;
 491			default:
 492				/* enable BACO as runpm mode on CI+ */
 493				adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 494				break;
 495			}
 496
 497			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
 498				if (bamaco_support & MACO_SUPPORT) {
 499					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 500					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
 501				} else {
 502					dev_info(adev->dev, "Using BACO for runtime pm\n");
 503				}
 504			}
 505		}
 506		break;
 507	case 0:
 508		dev_info(adev->dev, "runtime pm is manually disabled\n");
 509		break;
 510	default:
 511		break;
 512	}
 513
 514no_runtime_pm:
 515	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
 516		dev_info(adev->dev, "Runtime PM not available\n");
 517}
 518/**
 519 * amdgpu_device_supports_smart_shift - Is the device dGPU with
 520 * smart shift support
 521 *
 522 * @dev: drm_device pointer
 523 *
 524 * Returns true if the device is a dGPU with Smart Shift support,
 525 * otherwise returns false.
 526 */
 527bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 528{
 529	return (amdgpu_device_supports_boco(dev) &&
 530		amdgpu_acpi_is_power_shift_control_supported());
 531}
 532
 533/*
 534 * VRAM access helper functions
 535 */
 536
 537/**
 538 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 539 *
 540 * @adev: amdgpu_device pointer
 541 * @pos: offset of the buffer in vram
 542 * @buf: virtual address of the buffer in system memory
 543 * @size: read/write size, sizeof(@buf) must > @size
 544 * @write: true - write to vram, otherwise - read from vram
 545 */
 546void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 547			     void *buf, size_t size, bool write)
 548{
 549	unsigned long flags;
 550	uint32_t hi = ~0, tmp = 0;
 551	uint32_t *data = buf;
 552	uint64_t last;
 553	int idx;
 554
 555	if (!drm_dev_enter(adev_to_drm(adev), &idx))
 556		return;
 557
 558	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 559
 560	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 561	for (last = pos + size; pos < last; pos += 4) {
 562		tmp = pos >> 31;
 563
 564		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 565		if (tmp != hi) {
 566			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 567			hi = tmp;
 568		}
 569		if (write)
 570			WREG32_NO_KIQ(mmMM_DATA, *data++);
 571		else
 572			*data++ = RREG32_NO_KIQ(mmMM_DATA);
 573	}
 574
 575	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 576	drm_dev_exit(idx);
 577}
 578
 579/**
 580 * amdgpu_device_aper_access - access vram by vram aperature
 581 *
 582 * @adev: amdgpu_device pointer
 583 * @pos: offset of the buffer in vram
 584 * @buf: virtual address of the buffer in system memory
 585 * @size: read/write size, sizeof(@buf) must > @size
 586 * @write: true - write to vram, otherwise - read from vram
 587 *
 588 * The return value means how many bytes have been transferred.
 589 */
 590size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 591				 void *buf, size_t size, bool write)
 592{
 593#ifdef CONFIG_64BIT
 594	void __iomem *addr;
 595	size_t count = 0;
 596	uint64_t last;
 597
 598	if (!adev->mman.aper_base_kaddr)
 599		return 0;
 600
 601	last = min(pos + size, adev->gmc.visible_vram_size);
 602	if (last > pos) {
 603		addr = adev->mman.aper_base_kaddr + pos;
 604		count = last - pos;
 605
 606		if (write) {
 607			memcpy_toio(addr, buf, count);
 608			/* Make sure HDP write cache flush happens without any reordering
 609			 * after the system memory contents are sent over PCIe device
 610			 */
 611			mb();
 612			amdgpu_device_flush_hdp(adev, NULL);
 613		} else {
 614			amdgpu_device_invalidate_hdp(adev, NULL);
 615			/* Make sure HDP read cache is invalidated before issuing a read
 616			 * to the PCIe device
 617			 */
 618			mb();
 619			memcpy_fromio(buf, addr, count);
 620		}
 621
 622	}
 623
 624	return count;
 625#else
 626	return 0;
 627#endif
 628}
 629
 630/**
 631 * amdgpu_device_vram_access - read/write a buffer in vram
 632 *
 633 * @adev: amdgpu_device pointer
 634 * @pos: offset of the buffer in vram
 635 * @buf: virtual address of the buffer in system memory
 636 * @size: read/write size, sizeof(@buf) must > @size
 637 * @write: true - write to vram, otherwise - read from vram
 638 */
 639void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 640			       void *buf, size_t size, bool write)
 641{
 642	size_t count;
 643
 644	/* try to using vram apreature to access vram first */
 645	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 646	size -= count;
 647	if (size) {
 648		/* using MM to access rest vram */
 649		pos += count;
 650		buf += count;
 651		amdgpu_device_mm_access(adev, pos, buf, size, write);
 652	}
 653}
 654
 655/*
 656 * register access helper functions.
 657 */
 658
 659/* Check if hw access should be skipped because of hotplug or device error */
 660bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 661{
 662	if (adev->no_hw_access)
 663		return true;
 664
 665#ifdef CONFIG_LOCKDEP
 666	/*
 667	 * This is a bit complicated to understand, so worth a comment. What we assert
 668	 * here is that the GPU reset is not running on another thread in parallel.
 669	 *
 670	 * For this we trylock the read side of the reset semaphore, if that succeeds
 671	 * we know that the reset is not running in paralell.
 672	 *
 673	 * If the trylock fails we assert that we are either already holding the read
 674	 * side of the lock or are the reset thread itself and hold the write side of
 675	 * the lock.
 676	 */
 677	if (in_task()) {
 678		if (down_read_trylock(&adev->reset_domain->sem))
 679			up_read(&adev->reset_domain->sem);
 680		else
 681			lockdep_assert_held(&adev->reset_domain->sem);
 682	}
 683#endif
 684	return false;
 685}
 686
 687/**
 688 * amdgpu_device_rreg - read a memory mapped IO or indirect register
 689 *
 690 * @adev: amdgpu_device pointer
 691 * @reg: dword aligned register offset
 692 * @acc_flags: access flags which require special behavior
 693 *
 694 * Returns the 32 bit value from the offset specified.
 695 */
 696uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 697			    uint32_t reg, uint32_t acc_flags)
 698{
 699	uint32_t ret;
 700
 701	if (amdgpu_device_skip_hw_access(adev))
 702		return 0;
 703
 704	if ((reg * 4) < adev->rmmio_size) {
 705		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 706		    amdgpu_sriov_runtime(adev) &&
 707		    down_read_trylock(&adev->reset_domain->sem)) {
 708			ret = amdgpu_kiq_rreg(adev, reg, 0);
 709			up_read(&adev->reset_domain->sem);
 710		} else {
 711			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 712		}
 713	} else {
 714		ret = adev->pcie_rreg(adev, reg * 4);
 715	}
 716
 717	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 718
 719	return ret;
 720}
 721
 722/*
 723 * MMIO register read with bytes helper functions
 724 * @offset:bytes offset from MMIO start
 725 */
 726
 727/**
 728 * amdgpu_mm_rreg8 - read a memory mapped IO register
 729 *
 730 * @adev: amdgpu_device pointer
 731 * @offset: byte aligned register offset
 732 *
 733 * Returns the 8 bit value from the offset specified.
 734 */
 735uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 736{
 737	if (amdgpu_device_skip_hw_access(adev))
 738		return 0;
 739
 740	if (offset < adev->rmmio_size)
 741		return (readb(adev->rmmio + offset));
 742	BUG();
 743}
 744
 745
 746/**
 747 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
 748 *
 749 * @adev: amdgpu_device pointer
 750 * @reg: dword aligned register offset
 751 * @acc_flags: access flags which require special behavior
 752 * @xcc_id: xcc accelerated compute core id
 753 *
 754 * Returns the 32 bit value from the offset specified.
 755 */
 756uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
 757				uint32_t reg, uint32_t acc_flags,
 758				uint32_t xcc_id)
 759{
 760	uint32_t ret, rlcg_flag;
 761
 762	if (amdgpu_device_skip_hw_access(adev))
 763		return 0;
 764
 765	if ((reg * 4) < adev->rmmio_size) {
 766		if (amdgpu_sriov_vf(adev) &&
 767		    !amdgpu_sriov_runtime(adev) &&
 768		    adev->gfx.rlc.rlcg_reg_access_supported &&
 769		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 770							 GC_HWIP, false,
 771							 &rlcg_flag)) {
 772			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
 773		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 774		    amdgpu_sriov_runtime(adev) &&
 775		    down_read_trylock(&adev->reset_domain->sem)) {
 776			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
 777			up_read(&adev->reset_domain->sem);
 778		} else {
 779			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 780		}
 781	} else {
 782		ret = adev->pcie_rreg(adev, reg * 4);
 783	}
 784
 785	return ret;
 786}
 787
 788/*
 789 * MMIO register write with bytes helper functions
 790 * @offset:bytes offset from MMIO start
 791 * @value: the value want to be written to the register
 792 */
 793
 794/**
 795 * amdgpu_mm_wreg8 - read a memory mapped IO register
 796 *
 797 * @adev: amdgpu_device pointer
 798 * @offset: byte aligned register offset
 799 * @value: 8 bit value to write
 800 *
 801 * Writes the value specified to the offset specified.
 802 */
 803void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 804{
 805	if (amdgpu_device_skip_hw_access(adev))
 806		return;
 807
 808	if (offset < adev->rmmio_size)
 809		writeb(value, adev->rmmio + offset);
 810	else
 811		BUG();
 812}
 813
 814/**
 815 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 816 *
 817 * @adev: amdgpu_device pointer
 818 * @reg: dword aligned register offset
 819 * @v: 32 bit value to write to the register
 820 * @acc_flags: access flags which require special behavior
 821 *
 822 * Writes the value specified to the offset specified.
 823 */
 824void amdgpu_device_wreg(struct amdgpu_device *adev,
 825			uint32_t reg, uint32_t v,
 826			uint32_t acc_flags)
 827{
 828	if (amdgpu_device_skip_hw_access(adev))
 829		return;
 830
 831	if ((reg * 4) < adev->rmmio_size) {
 832		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 833		    amdgpu_sriov_runtime(adev) &&
 834		    down_read_trylock(&adev->reset_domain->sem)) {
 835			amdgpu_kiq_wreg(adev, reg, v, 0);
 836			up_read(&adev->reset_domain->sem);
 837		} else {
 838			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 839		}
 840	} else {
 841		adev->pcie_wreg(adev, reg * 4, v);
 842	}
 843
 844	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 845}
 846
 847/**
 848 * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 849 *
 850 * @adev: amdgpu_device pointer
 851 * @reg: mmio/rlc register
 852 * @v: value to write
 853 * @xcc_id: xcc accelerated compute core id
 854 *
 855 * this function is invoked only for the debugfs register access
 856 */
 857void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 858			     uint32_t reg, uint32_t v,
 859			     uint32_t xcc_id)
 860{
 861	if (amdgpu_device_skip_hw_access(adev))
 862		return;
 863
 864	if (amdgpu_sriov_fullaccess(adev) &&
 865	    adev->gfx.rlc.funcs &&
 866	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
 867		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 868			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
 869	} else if ((reg * 4) >= adev->rmmio_size) {
 870		adev->pcie_wreg(adev, reg * 4, v);
 871	} else {
 872		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 873	}
 874}
 875
 876/**
 877 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
 878 *
 879 * @adev: amdgpu_device pointer
 880 * @reg: dword aligned register offset
 881 * @v: 32 bit value to write to the register
 882 * @acc_flags: access flags which require special behavior
 883 * @xcc_id: xcc accelerated compute core id
 884 *
 885 * Writes the value specified to the offset specified.
 886 */
 887void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
 888			uint32_t reg, uint32_t v,
 889			uint32_t acc_flags, uint32_t xcc_id)
 890{
 891	uint32_t rlcg_flag;
 892
 893	if (amdgpu_device_skip_hw_access(adev))
 894		return;
 895
 896	if ((reg * 4) < adev->rmmio_size) {
 897		if (amdgpu_sriov_vf(adev) &&
 898		    !amdgpu_sriov_runtime(adev) &&
 899		    adev->gfx.rlc.rlcg_reg_access_supported &&
 900		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 901							 GC_HWIP, true,
 902							 &rlcg_flag)) {
 903			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
 904		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 905		    amdgpu_sriov_runtime(adev) &&
 906		    down_read_trylock(&adev->reset_domain->sem)) {
 907			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
 908			up_read(&adev->reset_domain->sem);
 909		} else {
 910			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 911		}
 912	} else {
 913		adev->pcie_wreg(adev, reg * 4, v);
 914	}
 915}
 916
 917/**
 918 * amdgpu_device_indirect_rreg - read an indirect register
 919 *
 920 * @adev: amdgpu_device pointer
 921 * @reg_addr: indirect register address to read from
 922 *
 923 * Returns the value of indirect register @reg_addr
 924 */
 925u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 926				u32 reg_addr)
 927{
 928	unsigned long flags, pcie_index, pcie_data;
 929	void __iomem *pcie_index_offset;
 930	void __iomem *pcie_data_offset;
 931	u32 r;
 932
 933	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 934	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 935
 936	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 937	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 938	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 939
 940	writel(reg_addr, pcie_index_offset);
 941	readl(pcie_index_offset);
 942	r = readl(pcie_data_offset);
 943	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 944
 945	return r;
 946}
 947
 948u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 949				    u64 reg_addr)
 950{
 951	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 952	u32 r;
 953	void __iomem *pcie_index_offset;
 954	void __iomem *pcie_index_hi_offset;
 955	void __iomem *pcie_data_offset;
 956
 957	if (unlikely(!adev->nbio.funcs)) {
 958		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
 959		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
 960	} else {
 961		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 962		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 963	}
 964
 965	if (reg_addr >> 32) {
 966		if (unlikely(!adev->nbio.funcs))
 967			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
 968		else
 969			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 970	} else {
 971		pcie_index_hi = 0;
 972	}
 973
 974	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 975	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 976	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 977	if (pcie_index_hi != 0)
 978		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 979				pcie_index_hi * 4;
 980
 981	writel(reg_addr, pcie_index_offset);
 982	readl(pcie_index_offset);
 983	if (pcie_index_hi != 0) {
 984		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 985		readl(pcie_index_hi_offset);
 986	}
 987	r = readl(pcie_data_offset);
 988
 989	/* clear the high bits */
 990	if (pcie_index_hi != 0) {
 991		writel(0, pcie_index_hi_offset);
 992		readl(pcie_index_hi_offset);
 993	}
 994
 995	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 996
 997	return r;
 998}
 999
1000/**
1001 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
1002 *
1003 * @adev: amdgpu_device pointer
1004 * @reg_addr: indirect register address to read from
1005 *
1006 * Returns the value of indirect register @reg_addr
1007 */
1008u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1009				  u32 reg_addr)
1010{
1011	unsigned long flags, pcie_index, pcie_data;
1012	void __iomem *pcie_index_offset;
1013	void __iomem *pcie_data_offset;
1014	u64 r;
1015
1016	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1017	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1018
1019	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1020	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1021	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1022
1023	/* read low 32 bits */
1024	writel(reg_addr, pcie_index_offset);
1025	readl(pcie_index_offset);
1026	r = readl(pcie_data_offset);
1027	/* read high 32 bits */
1028	writel(reg_addr + 4, pcie_index_offset);
1029	readl(pcie_index_offset);
1030	r |= ((u64)readl(pcie_data_offset) << 32);
1031	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1032
1033	return r;
1034}
1035
1036u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1037				  u64 reg_addr)
1038{
1039	unsigned long flags, pcie_index, pcie_data;
1040	unsigned long pcie_index_hi = 0;
1041	void __iomem *pcie_index_offset;
1042	void __iomem *pcie_index_hi_offset;
1043	void __iomem *pcie_data_offset;
1044	u64 r;
1045
1046	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1047	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1048	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1049		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1050
1051	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1052	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1053	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1054	if (pcie_index_hi != 0)
1055		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1056			pcie_index_hi * 4;
1057
1058	/* read low 32 bits */
1059	writel(reg_addr, pcie_index_offset);
1060	readl(pcie_index_offset);
1061	if (pcie_index_hi != 0) {
1062		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1063		readl(pcie_index_hi_offset);
1064	}
1065	r = readl(pcie_data_offset);
1066	/* read high 32 bits */
1067	writel(reg_addr + 4, pcie_index_offset);
1068	readl(pcie_index_offset);
1069	if (pcie_index_hi != 0) {
1070		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1071		readl(pcie_index_hi_offset);
1072	}
1073	r |= ((u64)readl(pcie_data_offset) << 32);
1074
1075	/* clear the high bits */
1076	if (pcie_index_hi != 0) {
1077		writel(0, pcie_index_hi_offset);
1078		readl(pcie_index_hi_offset);
1079	}
1080
1081	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1082
1083	return r;
1084}
1085
1086/**
1087 * amdgpu_device_indirect_wreg - write an indirect register address
1088 *
1089 * @adev: amdgpu_device pointer
1090 * @reg_addr: indirect register offset
1091 * @reg_data: indirect register data
1092 *
1093 */
1094void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1095				 u32 reg_addr, u32 reg_data)
1096{
1097	unsigned long flags, pcie_index, pcie_data;
1098	void __iomem *pcie_index_offset;
1099	void __iomem *pcie_data_offset;
1100
1101	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1102	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1103
1104	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1105	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1106	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1107
1108	writel(reg_addr, pcie_index_offset);
1109	readl(pcie_index_offset);
1110	writel(reg_data, pcie_data_offset);
1111	readl(pcie_data_offset);
1112	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1113}
1114
1115void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1116				     u64 reg_addr, u32 reg_data)
1117{
1118	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1119	void __iomem *pcie_index_offset;
1120	void __iomem *pcie_index_hi_offset;
1121	void __iomem *pcie_data_offset;
1122
1123	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1124	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1125	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1126		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1127	else
1128		pcie_index_hi = 0;
1129
1130	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1131	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1132	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1133	if (pcie_index_hi != 0)
1134		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1135				pcie_index_hi * 4;
1136
1137	writel(reg_addr, pcie_index_offset);
1138	readl(pcie_index_offset);
1139	if (pcie_index_hi != 0) {
1140		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1141		readl(pcie_index_hi_offset);
1142	}
1143	writel(reg_data, pcie_data_offset);
1144	readl(pcie_data_offset);
1145
1146	/* clear the high bits */
1147	if (pcie_index_hi != 0) {
1148		writel(0, pcie_index_hi_offset);
1149		readl(pcie_index_hi_offset);
1150	}
1151
1152	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1153}
1154
1155/**
1156 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1157 *
1158 * @adev: amdgpu_device pointer
1159 * @reg_addr: indirect register offset
1160 * @reg_data: indirect register data
1161 *
1162 */
1163void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1164				   u32 reg_addr, u64 reg_data)
1165{
1166	unsigned long flags, pcie_index, pcie_data;
1167	void __iomem *pcie_index_offset;
1168	void __iomem *pcie_data_offset;
1169
1170	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1171	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1172
1173	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1174	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1175	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1176
1177	/* write low 32 bits */
1178	writel(reg_addr, pcie_index_offset);
1179	readl(pcie_index_offset);
1180	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1181	readl(pcie_data_offset);
1182	/* write high 32 bits */
1183	writel(reg_addr + 4, pcie_index_offset);
1184	readl(pcie_index_offset);
1185	writel((u32)(reg_data >> 32), pcie_data_offset);
1186	readl(pcie_data_offset);
1187	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1188}
1189
1190void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1191				   u64 reg_addr, u64 reg_data)
1192{
1193	unsigned long flags, pcie_index, pcie_data;
1194	unsigned long pcie_index_hi = 0;
1195	void __iomem *pcie_index_offset;
1196	void __iomem *pcie_index_hi_offset;
1197	void __iomem *pcie_data_offset;
1198
1199	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1200	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1201	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1202		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1203
1204	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1205	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1206	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1207	if (pcie_index_hi != 0)
1208		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1209				pcie_index_hi * 4;
1210
1211	/* write low 32 bits */
1212	writel(reg_addr, pcie_index_offset);
1213	readl(pcie_index_offset);
1214	if (pcie_index_hi != 0) {
1215		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1216		readl(pcie_index_hi_offset);
1217	}
1218	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1219	readl(pcie_data_offset);
1220	/* write high 32 bits */
1221	writel(reg_addr + 4, pcie_index_offset);
1222	readl(pcie_index_offset);
1223	if (pcie_index_hi != 0) {
1224		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1225		readl(pcie_index_hi_offset);
1226	}
1227	writel((u32)(reg_data >> 32), pcie_data_offset);
1228	readl(pcie_data_offset);
1229
1230	/* clear the high bits */
1231	if (pcie_index_hi != 0) {
1232		writel(0, pcie_index_hi_offset);
1233		readl(pcie_index_hi_offset);
1234	}
1235
1236	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1237}
1238
1239/**
1240 * amdgpu_device_get_rev_id - query device rev_id
1241 *
1242 * @adev: amdgpu_device pointer
1243 *
1244 * Return device rev_id
1245 */
1246u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1247{
1248	return adev->nbio.funcs->get_rev_id(adev);
1249}
1250
1251/**
1252 * amdgpu_invalid_rreg - dummy reg read function
1253 *
1254 * @adev: amdgpu_device pointer
1255 * @reg: offset of register
1256 *
1257 * Dummy register read function.  Used for register blocks
1258 * that certain asics don't have (all asics).
1259 * Returns the value in the register.
1260 */
1261static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1262{
1263	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1264	BUG();
1265	return 0;
1266}
1267
1268static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1269{
1270	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1271	BUG();
1272	return 0;
1273}
1274
1275/**
1276 * amdgpu_invalid_wreg - dummy reg write function
1277 *
1278 * @adev: amdgpu_device pointer
1279 * @reg: offset of register
1280 * @v: value to write to the register
1281 *
1282 * Dummy register read function.  Used for register blocks
1283 * that certain asics don't have (all asics).
1284 */
1285static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1286{
1287	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1288		  reg, v);
1289	BUG();
1290}
1291
1292static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1293{
1294	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1295		  reg, v);
1296	BUG();
1297}
1298
1299/**
1300 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1301 *
1302 * @adev: amdgpu_device pointer
1303 * @reg: offset of register
1304 *
1305 * Dummy register read function.  Used for register blocks
1306 * that certain asics don't have (all asics).
1307 * Returns the value in the register.
1308 */
1309static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1310{
1311	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1312	BUG();
1313	return 0;
1314}
1315
1316static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1317{
1318	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1319	BUG();
1320	return 0;
1321}
1322
1323/**
1324 * amdgpu_invalid_wreg64 - dummy reg write function
1325 *
1326 * @adev: amdgpu_device pointer
1327 * @reg: offset of register
1328 * @v: value to write to the register
1329 *
1330 * Dummy register read function.  Used for register blocks
1331 * that certain asics don't have (all asics).
1332 */
1333static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1334{
1335	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1336		  reg, v);
1337	BUG();
1338}
1339
1340static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1341{
1342	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1343		  reg, v);
1344	BUG();
1345}
1346
1347/**
1348 * amdgpu_block_invalid_rreg - dummy reg read function
1349 *
1350 * @adev: amdgpu_device pointer
1351 * @block: offset of instance
1352 * @reg: offset of register
1353 *
1354 * Dummy register read function.  Used for register blocks
1355 * that certain asics don't have (all asics).
1356 * Returns the value in the register.
1357 */
1358static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1359					  uint32_t block, uint32_t reg)
1360{
1361	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1362		  reg, block);
1363	BUG();
1364	return 0;
1365}
1366
1367/**
1368 * amdgpu_block_invalid_wreg - dummy reg write function
1369 *
1370 * @adev: amdgpu_device pointer
1371 * @block: offset of instance
1372 * @reg: offset of register
1373 * @v: value to write to the register
1374 *
1375 * Dummy register read function.  Used for register blocks
1376 * that certain asics don't have (all asics).
1377 */
1378static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1379				      uint32_t block,
1380				      uint32_t reg, uint32_t v)
1381{
1382	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1383		  reg, block, v);
1384	BUG();
1385}
1386
1387/**
1388 * amdgpu_device_asic_init - Wrapper for atom asic_init
1389 *
1390 * @adev: amdgpu_device pointer
1391 *
1392 * Does any asic specific work and then calls atom asic init.
1393 */
1394static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1395{
1396	int ret;
1397
1398	amdgpu_asic_pre_asic_init(adev);
1399
1400	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1401	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1402	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1403		amdgpu_psp_wait_for_bootloader(adev);
1404		ret = amdgpu_atomfirmware_asic_init(adev, true);
1405		return ret;
1406	} else {
1407		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1408	}
1409
1410	return 0;
1411}
1412
1413/**
1414 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1415 *
1416 * @adev: amdgpu_device pointer
1417 *
1418 * Allocates a scratch page of VRAM for use by various things in the
1419 * driver.
1420 */
1421static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1422{
1423	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1424				       AMDGPU_GEM_DOMAIN_VRAM |
1425				       AMDGPU_GEM_DOMAIN_GTT,
1426				       &adev->mem_scratch.robj,
1427				       &adev->mem_scratch.gpu_addr,
1428				       (void **)&adev->mem_scratch.ptr);
1429}
1430
1431/**
1432 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1433 *
1434 * @adev: amdgpu_device pointer
1435 *
1436 * Frees the VRAM scratch page.
1437 */
1438static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1439{
1440	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1441}
1442
1443/**
1444 * amdgpu_device_program_register_sequence - program an array of registers.
1445 *
1446 * @adev: amdgpu_device pointer
1447 * @registers: pointer to the register array
1448 * @array_size: size of the register array
1449 *
1450 * Programs an array or registers with and or masks.
1451 * This is a helper for setting golden registers.
1452 */
1453void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1454					     const u32 *registers,
1455					     const u32 array_size)
1456{
1457	u32 tmp, reg, and_mask, or_mask;
1458	int i;
1459
1460	if (array_size % 3)
1461		return;
1462
1463	for (i = 0; i < array_size; i += 3) {
1464		reg = registers[i + 0];
1465		and_mask = registers[i + 1];
1466		or_mask = registers[i + 2];
1467
1468		if (and_mask == 0xffffffff) {
1469			tmp = or_mask;
1470		} else {
1471			tmp = RREG32(reg);
1472			tmp &= ~and_mask;
1473			if (adev->family >= AMDGPU_FAMILY_AI)
1474				tmp |= (or_mask & and_mask);
1475			else
1476				tmp |= or_mask;
1477		}
1478		WREG32(reg, tmp);
1479	}
1480}
1481
1482/**
1483 * amdgpu_device_pci_config_reset - reset the GPU
1484 *
1485 * @adev: amdgpu_device pointer
1486 *
1487 * Resets the GPU using the pci config reset sequence.
1488 * Only applicable to asics prior to vega10.
1489 */
1490void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1491{
1492	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1493}
1494
1495/**
1496 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1497 *
1498 * @adev: amdgpu_device pointer
1499 *
1500 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1501 */
1502int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1503{
1504	return pci_reset_function(adev->pdev);
1505}
1506
1507/*
1508 * amdgpu_device_wb_*()
1509 * Writeback is the method by which the GPU updates special pages in memory
1510 * with the status of certain GPU events (fences, ring pointers,etc.).
1511 */
1512
1513/**
1514 * amdgpu_device_wb_fini - Disable Writeback and free memory
1515 *
1516 * @adev: amdgpu_device pointer
1517 *
1518 * Disables Writeback and frees the Writeback memory (all asics).
1519 * Used at driver shutdown.
1520 */
1521static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1522{
1523	if (adev->wb.wb_obj) {
1524		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1525				      &adev->wb.gpu_addr,
1526				      (void **)&adev->wb.wb);
1527		adev->wb.wb_obj = NULL;
1528	}
1529}
1530
1531/**
1532 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1533 *
1534 * @adev: amdgpu_device pointer
1535 *
1536 * Initializes writeback and allocates writeback memory (all asics).
1537 * Used at driver startup.
1538 * Returns 0 on success or an -error on failure.
1539 */
1540static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1541{
1542	int r;
1543
1544	if (adev->wb.wb_obj == NULL) {
1545		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1546		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1547					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1548					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1549					    (void **)&adev->wb.wb);
1550		if (r) {
1551			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1552			return r;
1553		}
1554
1555		adev->wb.num_wb = AMDGPU_MAX_WB;
1556		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1557
1558		/* clear wb memory */
1559		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1560	}
1561
1562	return 0;
1563}
1564
1565/**
1566 * amdgpu_device_wb_get - Allocate a wb entry
1567 *
1568 * @adev: amdgpu_device pointer
1569 * @wb: wb index
1570 *
1571 * Allocate a wb slot for use by the driver (all asics).
1572 * Returns 0 on success or -EINVAL on failure.
1573 */
1574int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1575{
1576	unsigned long flags, offset;
1577
1578	spin_lock_irqsave(&adev->wb.lock, flags);
1579	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1580	if (offset < adev->wb.num_wb) {
1581		__set_bit(offset, adev->wb.used);
1582		spin_unlock_irqrestore(&adev->wb.lock, flags);
1583		*wb = offset << 3; /* convert to dw offset */
1584		return 0;
1585	} else {
1586		spin_unlock_irqrestore(&adev->wb.lock, flags);
1587		return -EINVAL;
1588	}
1589}
1590
1591/**
1592 * amdgpu_device_wb_free - Free a wb entry
1593 *
1594 * @adev: amdgpu_device pointer
1595 * @wb: wb index
1596 *
1597 * Free a wb slot allocated for use by the driver (all asics)
1598 */
1599void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1600{
1601	unsigned long flags;
1602
1603	wb >>= 3;
1604	spin_lock_irqsave(&adev->wb.lock, flags);
1605	if (wb < adev->wb.num_wb)
1606		__clear_bit(wb, adev->wb.used);
1607	spin_unlock_irqrestore(&adev->wb.lock, flags);
1608}
1609
1610/**
1611 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1612 *
1613 * @adev: amdgpu_device pointer
1614 *
1615 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1616 * to fail, but if any of the BARs is not accessible after the size we abort
1617 * driver loading by returning -ENODEV.
1618 */
1619int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1620{
1621	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1622	struct pci_bus *root;
1623	struct resource *res;
1624	unsigned int i;
1625	u16 cmd;
1626	int r;
1627
1628	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1629		return 0;
1630
1631	/* Bypass for VF */
1632	if (amdgpu_sriov_vf(adev))
1633		return 0;
1634
1635	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1636	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1637		DRM_WARN("System can't access extended configuration space, please check!!\n");
1638
1639	/* skip if the bios has already enabled large BAR */
1640	if (adev->gmc.real_vram_size &&
1641	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1642		return 0;
1643
1644	/* Check if the root BUS has 64bit memory resources */
1645	root = adev->pdev->bus;
1646	while (root->parent)
1647		root = root->parent;
1648
1649	pci_bus_for_each_resource(root, res, i) {
1650		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1651		    res->start > 0x100000000ull)
1652			break;
1653	}
1654
1655	/* Trying to resize is pointless without a root hub window above 4GB */
1656	if (!res)
1657		return 0;
1658
1659	/* Limit the BAR size to what is available */
1660	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1661			rbar_size);
1662
1663	/* Disable memory decoding while we change the BAR addresses and size */
1664	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1665	pci_write_config_word(adev->pdev, PCI_COMMAND,
1666			      cmd & ~PCI_COMMAND_MEMORY);
1667
1668	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1669	amdgpu_doorbell_fini(adev);
1670	if (adev->asic_type >= CHIP_BONAIRE)
1671		pci_release_resource(adev->pdev, 2);
1672
1673	pci_release_resource(adev->pdev, 0);
1674
1675	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1676	if (r == -ENOSPC)
1677		DRM_INFO("Not enough PCI address space for a large BAR.");
1678	else if (r && r != -ENOTSUPP)
1679		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1680
1681	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1682
1683	/* When the doorbell or fb BAR isn't available we have no chance of
1684	 * using the device.
1685	 */
1686	r = amdgpu_doorbell_init(adev);
1687	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1688		return -ENODEV;
1689
1690	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1691
1692	return 0;
1693}
1694
1695static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1696{
1697	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1698		return false;
1699
1700	return true;
1701}
1702
1703/*
1704 * GPU helpers function.
1705 */
1706/**
1707 * amdgpu_device_need_post - check if the hw need post or not
1708 *
1709 * @adev: amdgpu_device pointer
1710 *
1711 * Check if the asic has been initialized (all asics) at driver startup
1712 * or post is needed if  hw reset is performed.
1713 * Returns true if need or false if not.
1714 */
1715bool amdgpu_device_need_post(struct amdgpu_device *adev)
1716{
1717	uint32_t reg;
1718
1719	if (amdgpu_sriov_vf(adev))
1720		return false;
1721
1722	if (!amdgpu_device_read_bios(adev))
1723		return false;
1724
1725	if (amdgpu_passthrough(adev)) {
1726		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1727		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1728		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1729		 * vpost executed for smc version below 22.15
1730		 */
1731		if (adev->asic_type == CHIP_FIJI) {
1732			int err;
1733			uint32_t fw_ver;
1734
1735			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1736			/* force vPost if error occured */
1737			if (err)
1738				return true;
1739
1740			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1741			release_firmware(adev->pm.fw);
1742			if (fw_ver < 0x00160e00)
1743				return true;
1744		}
1745	}
1746
1747	/* Don't post if we need to reset whole hive on init */
1748	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1749		return false;
1750
1751	if (adev->has_hw_reset) {
1752		adev->has_hw_reset = false;
1753		return true;
1754	}
1755
1756	/* bios scratch used on CIK+ */
1757	if (adev->asic_type >= CHIP_BONAIRE)
1758		return amdgpu_atombios_scratch_need_asic_init(adev);
1759
1760	/* check MEM_SIZE for older asics */
1761	reg = amdgpu_asic_get_config_memsize(adev);
1762
1763	if ((reg != 0) && (reg != 0xffffffff))
1764		return false;
1765
1766	return true;
1767}
1768
1769/*
1770 * Check whether seamless boot is supported.
1771 *
1772 * So far we only support seamless boot on DCE 3.0 or later.
1773 * If users report that it works on older ASICS as well, we may
1774 * loosen this.
1775 */
1776bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1777{
1778	switch (amdgpu_seamless) {
1779	case -1:
1780		break;
1781	case 1:
1782		return true;
1783	case 0:
1784		return false;
1785	default:
1786		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1787			  amdgpu_seamless);
1788		return false;
1789	}
1790
1791	if (!(adev->flags & AMD_IS_APU))
1792		return false;
1793
1794	if (adev->mman.keep_stolen_vga_memory)
1795		return false;
1796
1797	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1798}
1799
1800/*
1801 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1802 * don't support dynamic speed switching. Until we have confirmation from Intel
1803 * that a specific host supports it, it's safer that we keep it disabled for all.
1804 *
1805 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1806 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1807 */
1808static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1809{
1810#if IS_ENABLED(CONFIG_X86)
1811	struct cpuinfo_x86 *c = &cpu_data(0);
1812
1813	/* eGPU change speeds based on USB4 fabric conditions */
1814	if (dev_is_removable(adev->dev))
1815		return true;
1816
1817	if (c->x86_vendor == X86_VENDOR_INTEL)
1818		return false;
1819#endif
1820	return true;
1821}
1822
1823/**
1824 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1825 *
1826 * @adev: amdgpu_device pointer
1827 *
1828 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1829 * be set for this device.
1830 *
1831 * Returns true if it should be used or false if not.
1832 */
1833bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1834{
1835	switch (amdgpu_aspm) {
1836	case -1:
1837		break;
1838	case 0:
1839		return false;
1840	case 1:
1841		return true;
1842	default:
1843		return false;
1844	}
1845	if (adev->flags & AMD_IS_APU)
1846		return false;
1847	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1848		return false;
1849	return pcie_aspm_enabled(adev->pdev);
1850}
1851
1852/* if we get transitioned to only one device, take VGA back */
1853/**
1854 * amdgpu_device_vga_set_decode - enable/disable vga decode
1855 *
1856 * @pdev: PCI device pointer
1857 * @state: enable/disable vga decode
1858 *
1859 * Enable/disable vga decode (all asics).
1860 * Returns VGA resource flags.
1861 */
1862static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1863		bool state)
1864{
1865	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1866
1867	amdgpu_asic_set_vga_state(adev, state);
1868	if (state)
1869		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1870		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1871	else
1872		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1873}
1874
1875/**
1876 * amdgpu_device_check_block_size - validate the vm block size
1877 *
1878 * @adev: amdgpu_device pointer
1879 *
1880 * Validates the vm block size specified via module parameter.
1881 * The vm block size defines number of bits in page table versus page directory,
1882 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1883 * page table and the remaining bits are in the page directory.
1884 */
1885static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1886{
1887	/* defines number of bits in page table versus page directory,
1888	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1889	 * page table and the remaining bits are in the page directory
1890	 */
1891	if (amdgpu_vm_block_size == -1)
1892		return;
1893
1894	if (amdgpu_vm_block_size < 9) {
1895		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1896			 amdgpu_vm_block_size);
1897		amdgpu_vm_block_size = -1;
1898	}
1899}
1900
1901/**
1902 * amdgpu_device_check_vm_size - validate the vm size
1903 *
1904 * @adev: amdgpu_device pointer
1905 *
1906 * Validates the vm size in GB specified via module parameter.
1907 * The VM size is the size of the GPU virtual memory space in GB.
1908 */
1909static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1910{
1911	/* no need to check the default value */
1912	if (amdgpu_vm_size == -1)
1913		return;
1914
1915	if (amdgpu_vm_size < 1) {
1916		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1917			 amdgpu_vm_size);
1918		amdgpu_vm_size = -1;
1919	}
1920}
1921
1922static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1923{
1924	struct sysinfo si;
1925	bool is_os_64 = (sizeof(void *) == 8);
1926	uint64_t total_memory;
1927	uint64_t dram_size_seven_GB = 0x1B8000000;
1928	uint64_t dram_size_three_GB = 0xB8000000;
1929
1930	if (amdgpu_smu_memory_pool_size == 0)
1931		return;
1932
1933	if (!is_os_64) {
1934		DRM_WARN("Not 64-bit OS, feature not supported\n");
1935		goto def_value;
1936	}
1937	si_meminfo(&si);
1938	total_memory = (uint64_t)si.totalram * si.mem_unit;
1939
1940	if ((amdgpu_smu_memory_pool_size == 1) ||
1941		(amdgpu_smu_memory_pool_size == 2)) {
1942		if (total_memory < dram_size_three_GB)
1943			goto def_value1;
1944	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1945		(amdgpu_smu_memory_pool_size == 8)) {
1946		if (total_memory < dram_size_seven_GB)
1947			goto def_value1;
1948	} else {
1949		DRM_WARN("Smu memory pool size not supported\n");
1950		goto def_value;
1951	}
1952	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1953
1954	return;
1955
1956def_value1:
1957	DRM_WARN("No enough system memory\n");
1958def_value:
1959	adev->pm.smu_prv_buffer_size = 0;
1960}
1961
1962static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1963{
1964	if (!(adev->flags & AMD_IS_APU) ||
1965	    adev->asic_type < CHIP_RAVEN)
1966		return 0;
1967
1968	switch (adev->asic_type) {
1969	case CHIP_RAVEN:
1970		if (adev->pdev->device == 0x15dd)
1971			adev->apu_flags |= AMD_APU_IS_RAVEN;
1972		if (adev->pdev->device == 0x15d8)
1973			adev->apu_flags |= AMD_APU_IS_PICASSO;
1974		break;
1975	case CHIP_RENOIR:
1976		if ((adev->pdev->device == 0x1636) ||
1977		    (adev->pdev->device == 0x164c))
1978			adev->apu_flags |= AMD_APU_IS_RENOIR;
1979		else
1980			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1981		break;
1982	case CHIP_VANGOGH:
1983		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1984		break;
1985	case CHIP_YELLOW_CARP:
1986		break;
1987	case CHIP_CYAN_SKILLFISH:
1988		if ((adev->pdev->device == 0x13FE) ||
1989		    (adev->pdev->device == 0x143F))
1990			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1991		break;
1992	default:
1993		break;
1994	}
1995
1996	return 0;
1997}
1998
1999/**
2000 * amdgpu_device_check_arguments - validate module params
2001 *
2002 * @adev: amdgpu_device pointer
2003 *
2004 * Validates certain module parameters and updates
2005 * the associated values used by the driver (all asics).
2006 */
2007static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2008{
2009	int i;
2010
2011	if (amdgpu_sched_jobs < 4) {
2012		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2013			 amdgpu_sched_jobs);
2014		amdgpu_sched_jobs = 4;
2015	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
2016		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2017			 amdgpu_sched_jobs);
2018		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2019	}
2020
2021	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
2022		/* gart size must be greater or equal to 32M */
2023		dev_warn(adev->dev, "gart size (%d) too small\n",
2024			 amdgpu_gart_size);
2025		amdgpu_gart_size = -1;
2026	}
2027
2028	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
2029		/* gtt size must be greater or equal to 32M */
2030		dev_warn(adev->dev, "gtt size (%d) too small\n",
2031				 amdgpu_gtt_size);
2032		amdgpu_gtt_size = -1;
2033	}
2034
2035	/* valid range is between 4 and 9 inclusive */
2036	if (amdgpu_vm_fragment_size != -1 &&
2037	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
2038		dev_warn(adev->dev, "valid range is between 4 and 9\n");
2039		amdgpu_vm_fragment_size = -1;
2040	}
2041
2042	if (amdgpu_sched_hw_submission < 2) {
2043		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2044			 amdgpu_sched_hw_submission);
2045		amdgpu_sched_hw_submission = 2;
2046	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2047		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2048			 amdgpu_sched_hw_submission);
2049		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2050	}
2051
2052	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2053		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2054		amdgpu_reset_method = -1;
2055	}
2056
2057	amdgpu_device_check_smu_prv_buffer_size(adev);
2058
2059	amdgpu_device_check_vm_size(adev);
2060
2061	amdgpu_device_check_block_size(adev);
2062
2063	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2064
2065	for (i = 0; i < MAX_XCP; i++)
2066		adev->enforce_isolation[i] = !!enforce_isolation;
2067
2068	return 0;
2069}
2070
2071/**
2072 * amdgpu_switcheroo_set_state - set switcheroo state
2073 *
2074 * @pdev: pci dev pointer
2075 * @state: vga_switcheroo state
2076 *
2077 * Callback for the switcheroo driver.  Suspends or resumes
2078 * the asics before or after it is powered up using ACPI methods.
2079 */
2080static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2081					enum vga_switcheroo_state state)
2082{
2083	struct drm_device *dev = pci_get_drvdata(pdev);
2084	int r;
2085
2086	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
2087		return;
2088
2089	if (state == VGA_SWITCHEROO_ON) {
2090		pr_info("switched on\n");
2091		/* don't suspend or resume card normally */
2092		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2093
2094		pci_set_power_state(pdev, PCI_D0);
2095		amdgpu_device_load_pci_state(pdev);
2096		r = pci_enable_device(pdev);
2097		if (r)
2098			DRM_WARN("pci_enable_device failed (%d)\n", r);
2099		amdgpu_device_resume(dev, true);
2100
2101		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2102	} else {
2103		pr_info("switched off\n");
2104		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2105		amdgpu_device_prepare(dev);
2106		amdgpu_device_suspend(dev, true);
2107		amdgpu_device_cache_pci_state(pdev);
2108		/* Shut down the device */
2109		pci_disable_device(pdev);
2110		pci_set_power_state(pdev, PCI_D3cold);
2111		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2112	}
2113}
2114
2115/**
2116 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2117 *
2118 * @pdev: pci dev pointer
2119 *
2120 * Callback for the switcheroo driver.  Check of the switcheroo
2121 * state can be changed.
2122 * Returns true if the state can be changed, false if not.
2123 */
2124static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2125{
2126	struct drm_device *dev = pci_get_drvdata(pdev);
2127
2128       /*
2129	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2130	* locking inversion with the driver load path. And the access here is
2131	* completely racy anyway. So don't bother with locking for now.
2132	*/
2133	return atomic_read(&dev->open_count) == 0;
2134}
2135
2136static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2137	.set_gpu_state = amdgpu_switcheroo_set_state,
2138	.reprobe = NULL,
2139	.can_switch = amdgpu_switcheroo_can_switch,
2140};
2141
2142/**
2143 * amdgpu_device_ip_set_clockgating_state - set the CG state
2144 *
2145 * @dev: amdgpu_device pointer
2146 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2147 * @state: clockgating state (gate or ungate)
2148 *
2149 * Sets the requested clockgating state for all instances of
2150 * the hardware IP specified.
2151 * Returns the error code from the last instance.
2152 */
2153int amdgpu_device_ip_set_clockgating_state(void *dev,
2154					   enum amd_ip_block_type block_type,
2155					   enum amd_clockgating_state state)
2156{
2157	struct amdgpu_device *adev = dev;
2158	int i, r = 0;
2159
2160	for (i = 0; i < adev->num_ip_blocks; i++) {
2161		if (!adev->ip_blocks[i].status.valid)
2162			continue;
2163		if (adev->ip_blocks[i].version->type != block_type)
2164			continue;
2165		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2166			continue;
2167		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2168			(void *)adev, state);
2169		if (r)
2170			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2171				  adev->ip_blocks[i].version->funcs->name, r);
2172	}
2173	return r;
2174}
2175
2176/**
2177 * amdgpu_device_ip_set_powergating_state - set the PG state
2178 *
2179 * @dev: amdgpu_device pointer
2180 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2181 * @state: powergating state (gate or ungate)
2182 *
2183 * Sets the requested powergating state for all instances of
2184 * the hardware IP specified.
2185 * Returns the error code from the last instance.
2186 */
2187int amdgpu_device_ip_set_powergating_state(void *dev,
2188					   enum amd_ip_block_type block_type,
2189					   enum amd_powergating_state state)
2190{
2191	struct amdgpu_device *adev = dev;
2192	int i, r = 0;
2193
2194	for (i = 0; i < adev->num_ip_blocks; i++) {
2195		if (!adev->ip_blocks[i].status.valid)
2196			continue;
2197		if (adev->ip_blocks[i].version->type != block_type)
2198			continue;
2199		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2200			continue;
2201		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2202			(void *)adev, state);
2203		if (r)
2204			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2205				  adev->ip_blocks[i].version->funcs->name, r);
2206	}
2207	return r;
2208}
2209
2210/**
2211 * amdgpu_device_ip_get_clockgating_state - get the CG state
2212 *
2213 * @adev: amdgpu_device pointer
2214 * @flags: clockgating feature flags
2215 *
2216 * Walks the list of IPs on the device and updates the clockgating
2217 * flags for each IP.
2218 * Updates @flags with the feature flags for each hardware IP where
2219 * clockgating is enabled.
2220 */
2221void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2222					    u64 *flags)
2223{
2224	int i;
2225
2226	for (i = 0; i < adev->num_ip_blocks; i++) {
2227		if (!adev->ip_blocks[i].status.valid)
2228			continue;
2229		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2230			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2231	}
2232}
2233
2234/**
2235 * amdgpu_device_ip_wait_for_idle - wait for idle
2236 *
2237 * @adev: amdgpu_device pointer
2238 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2239 *
2240 * Waits for the request hardware IP to be idle.
2241 * Returns 0 for success or a negative error code on failure.
2242 */
2243int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2244				   enum amd_ip_block_type block_type)
2245{
2246	int i, r;
2247
2248	for (i = 0; i < adev->num_ip_blocks; i++) {
2249		if (!adev->ip_blocks[i].status.valid)
2250			continue;
2251		if (adev->ip_blocks[i].version->type == block_type) {
2252			if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2253				r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2254								&adev->ip_blocks[i]);
2255				if (r)
2256					return r;
2257			}
2258			break;
2259		}
2260	}
2261	return 0;
2262
2263}
2264
2265/**
2266 * amdgpu_device_ip_is_valid - is the hardware IP enabled
2267 *
2268 * @adev: amdgpu_device pointer
2269 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2270 *
2271 * Check if the hardware IP is enable or not.
2272 * Returns true if it the IP is enable, false if not.
2273 */
2274bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2275			       enum amd_ip_block_type block_type)
2276{
2277	int i;
2278
2279	for (i = 0; i < adev->num_ip_blocks; i++) {
2280		if (adev->ip_blocks[i].version->type == block_type)
2281			return adev->ip_blocks[i].status.valid;
2282	}
2283	return false;
2284
2285}
2286
2287/**
2288 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2289 *
2290 * @adev: amdgpu_device pointer
2291 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2292 *
2293 * Returns a pointer to the hardware IP block structure
2294 * if it exists for the asic, otherwise NULL.
2295 */
2296struct amdgpu_ip_block *
2297amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2298			      enum amd_ip_block_type type)
2299{
2300	int i;
2301
2302	for (i = 0; i < adev->num_ip_blocks; i++)
2303		if (adev->ip_blocks[i].version->type == type)
2304			return &adev->ip_blocks[i];
2305
2306	return NULL;
2307}
2308
2309/**
2310 * amdgpu_device_ip_block_version_cmp
2311 *
2312 * @adev: amdgpu_device pointer
2313 * @type: enum amd_ip_block_type
2314 * @major: major version
2315 * @minor: minor version
2316 *
2317 * return 0 if equal or greater
2318 * return 1 if smaller or the ip_block doesn't exist
2319 */
2320int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2321				       enum amd_ip_block_type type,
2322				       u32 major, u32 minor)
2323{
2324	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2325
2326	if (ip_block && ((ip_block->version->major > major) ||
2327			((ip_block->version->major == major) &&
2328			(ip_block->version->minor >= minor))))
2329		return 0;
2330
2331	return 1;
2332}
2333
2334/**
2335 * amdgpu_device_ip_block_add
2336 *
2337 * @adev: amdgpu_device pointer
2338 * @ip_block_version: pointer to the IP to add
2339 *
2340 * Adds the IP block driver information to the collection of IPs
2341 * on the asic.
2342 */
2343int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2344			       const struct amdgpu_ip_block_version *ip_block_version)
2345{
2346	if (!ip_block_version)
2347		return -EINVAL;
2348
2349	switch (ip_block_version->type) {
2350	case AMD_IP_BLOCK_TYPE_VCN:
2351		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2352			return 0;
2353		break;
2354	case AMD_IP_BLOCK_TYPE_JPEG:
2355		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2356			return 0;
2357		break;
2358	default:
2359		break;
2360	}
2361
2362	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2363		  ip_block_version->funcs->name);
2364
2365	adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2366
2367	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2368
2369	return 0;
2370}
2371
2372/**
2373 * amdgpu_device_enable_virtual_display - enable virtual display feature
2374 *
2375 * @adev: amdgpu_device pointer
2376 *
2377 * Enabled the virtual display feature if the user has enabled it via
2378 * the module parameter virtual_display.  This feature provides a virtual
2379 * display hardware on headless boards or in virtualized environments.
2380 * This function parses and validates the configuration string specified by
2381 * the user and configues the virtual display configuration (number of
2382 * virtual connectors, crtcs, etc.) specified.
2383 */
2384static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2385{
2386	adev->enable_virtual_display = false;
2387
2388	if (amdgpu_virtual_display) {
2389		const char *pci_address_name = pci_name(adev->pdev);
2390		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2391
2392		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2393		pciaddstr_tmp = pciaddstr;
2394		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2395			pciaddname = strsep(&pciaddname_tmp, ",");
2396			if (!strcmp("all", pciaddname)
2397			    || !strcmp(pci_address_name, pciaddname)) {
2398				long num_crtc;
2399				int res = -1;
2400
2401				adev->enable_virtual_display = true;
2402
2403				if (pciaddname_tmp)
2404					res = kstrtol(pciaddname_tmp, 10,
2405						      &num_crtc);
2406
2407				if (!res) {
2408					if (num_crtc < 1)
2409						num_crtc = 1;
2410					if (num_crtc > 6)
2411						num_crtc = 6;
2412					adev->mode_info.num_crtc = num_crtc;
2413				} else {
2414					adev->mode_info.num_crtc = 1;
2415				}
2416				break;
2417			}
2418		}
2419
2420		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2421			 amdgpu_virtual_display, pci_address_name,
2422			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2423
2424		kfree(pciaddstr);
2425	}
2426}
2427
2428void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2429{
2430	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2431		adev->mode_info.num_crtc = 1;
2432		adev->enable_virtual_display = true;
2433		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2434			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2435	}
2436}
2437
2438/**
2439 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2440 *
2441 * @adev: amdgpu_device pointer
2442 *
2443 * Parses the asic configuration parameters specified in the gpu info
2444 * firmware and makes them availale to the driver for use in configuring
2445 * the asic.
2446 * Returns 0 on success, -EINVAL on failure.
2447 */
2448static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2449{
2450	const char *chip_name;
2451	int err;
2452	const struct gpu_info_firmware_header_v1_0 *hdr;
2453
2454	adev->firmware.gpu_info_fw = NULL;
2455
2456	if (adev->mman.discovery_bin)
2457		return 0;
2458
2459	switch (adev->asic_type) {
2460	default:
2461		return 0;
2462	case CHIP_VEGA10:
2463		chip_name = "vega10";
2464		break;
2465	case CHIP_VEGA12:
2466		chip_name = "vega12";
2467		break;
2468	case CHIP_RAVEN:
2469		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2470			chip_name = "raven2";
2471		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2472			chip_name = "picasso";
2473		else
2474			chip_name = "raven";
2475		break;
2476	case CHIP_ARCTURUS:
2477		chip_name = "arcturus";
2478		break;
2479	case CHIP_NAVI12:
2480		chip_name = "navi12";
2481		break;
2482	}
2483
2484	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2485				   "amdgpu/%s_gpu_info.bin", chip_name);
2486	if (err) {
2487		dev_err(adev->dev,
2488			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2489			chip_name);
2490		goto out;
2491	}
2492
2493	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2494	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2495
2496	switch (hdr->version_major) {
2497	case 1:
2498	{
2499		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2500			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2501								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2502
2503		/*
2504		 * Should be droped when DAL no longer needs it.
2505		 */
2506		if (adev->asic_type == CHIP_NAVI12)
2507			goto parse_soc_bounding_box;
2508
2509		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2510		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2511		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2512		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2513		adev->gfx.config.max_texture_channel_caches =
2514			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2515		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2516		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2517		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2518		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2519		adev->gfx.config.double_offchip_lds_buf =
2520			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2521		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2522		adev->gfx.cu_info.max_waves_per_simd =
2523			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2524		adev->gfx.cu_info.max_scratch_slots_per_cu =
2525			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2526		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2527		if (hdr->version_minor >= 1) {
2528			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2529				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2530									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2531			adev->gfx.config.num_sc_per_sh =
2532				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2533			adev->gfx.config.num_packer_per_sc =
2534				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2535		}
2536
2537parse_soc_bounding_box:
2538		/*
2539		 * soc bounding box info is not integrated in disocovery table,
2540		 * we always need to parse it from gpu info firmware if needed.
2541		 */
2542		if (hdr->version_minor == 2) {
2543			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2544				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2545									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2546			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2547		}
2548		break;
2549	}
2550	default:
2551		dev_err(adev->dev,
2552			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2553		err = -EINVAL;
2554		goto out;
2555	}
2556out:
2557	return err;
2558}
2559
2560/**
2561 * amdgpu_device_ip_early_init - run early init for hardware IPs
2562 *
2563 * @adev: amdgpu_device pointer
2564 *
2565 * Early initialization pass for hardware IPs.  The hardware IPs that make
2566 * up each asic are discovered each IP's early_init callback is run.  This
2567 * is the first stage in initializing the asic.
2568 * Returns 0 on success, negative error code on failure.
2569 */
2570static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2571{
2572	struct amdgpu_ip_block *ip_block;
2573	struct pci_dev *parent;
2574	int i, r;
2575	bool total;
2576
2577	amdgpu_device_enable_virtual_display(adev);
2578
2579	if (amdgpu_sriov_vf(adev)) {
2580		r = amdgpu_virt_request_full_gpu(adev, true);
2581		if (r)
2582			return r;
2583	}
2584
2585	switch (adev->asic_type) {
2586#ifdef CONFIG_DRM_AMDGPU_SI
2587	case CHIP_VERDE:
2588	case CHIP_TAHITI:
2589	case CHIP_PITCAIRN:
2590	case CHIP_OLAND:
2591	case CHIP_HAINAN:
2592		adev->family = AMDGPU_FAMILY_SI;
2593		r = si_set_ip_blocks(adev);
2594		if (r)
2595			return r;
2596		break;
2597#endif
2598#ifdef CONFIG_DRM_AMDGPU_CIK
2599	case CHIP_BONAIRE:
2600	case CHIP_HAWAII:
2601	case CHIP_KAVERI:
2602	case CHIP_KABINI:
2603	case CHIP_MULLINS:
2604		if (adev->flags & AMD_IS_APU)
2605			adev->family = AMDGPU_FAMILY_KV;
2606		else
2607			adev->family = AMDGPU_FAMILY_CI;
2608
2609		r = cik_set_ip_blocks(adev);
2610		if (r)
2611			return r;
2612		break;
2613#endif
2614	case CHIP_TOPAZ:
2615	case CHIP_TONGA:
2616	case CHIP_FIJI:
2617	case CHIP_POLARIS10:
2618	case CHIP_POLARIS11:
2619	case CHIP_POLARIS12:
2620	case CHIP_VEGAM:
2621	case CHIP_CARRIZO:
2622	case CHIP_STONEY:
2623		if (adev->flags & AMD_IS_APU)
2624			adev->family = AMDGPU_FAMILY_CZ;
2625		else
2626			adev->family = AMDGPU_FAMILY_VI;
2627
2628		r = vi_set_ip_blocks(adev);
2629		if (r)
2630			return r;
2631		break;
2632	default:
2633		r = amdgpu_discovery_set_ip_blocks(adev);
2634		if (r)
2635			return r;
2636		break;
2637	}
2638
2639	if (amdgpu_has_atpx() &&
2640	    (amdgpu_is_atpx_hybrid() ||
2641	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2642	    ((adev->flags & AMD_IS_APU) == 0) &&
2643	    !dev_is_removable(&adev->pdev->dev))
2644		adev->flags |= AMD_IS_PX;
2645
2646	if (!(adev->flags & AMD_IS_APU)) {
2647		parent = pcie_find_root_port(adev->pdev);
2648		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2649	}
2650
2651
2652	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2653	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2654		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2655	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2656		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2657	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2658		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2659
2660	total = true;
2661	for (i = 0; i < adev->num_ip_blocks; i++) {
2662		ip_block = &adev->ip_blocks[i];
2663
2664		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2665			DRM_WARN("disabled ip block: %d <%s>\n",
2666				  i, adev->ip_blocks[i].version->funcs->name);
2667			adev->ip_blocks[i].status.valid = false;
2668		} else if (ip_block->version->funcs->early_init) {
2669			r = ip_block->version->funcs->early_init(ip_block);
2670			if (r == -ENOENT) {
2671				adev->ip_blocks[i].status.valid = false;
2672			} else if (r) {
2673				DRM_ERROR("early_init of IP block <%s> failed %d\n",
2674					  adev->ip_blocks[i].version->funcs->name, r);
2675				total = false;
2676			} else {
2677				adev->ip_blocks[i].status.valid = true;
2678			}
2679		} else {
2680			adev->ip_blocks[i].status.valid = true;
2681		}
2682		/* get the vbios after the asic_funcs are set up */
2683		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2684			r = amdgpu_device_parse_gpu_info_fw(adev);
2685			if (r)
2686				return r;
2687
2688			/* Read BIOS */
2689			if (amdgpu_device_read_bios(adev)) {
2690				if (!amdgpu_get_bios(adev))
2691					return -EINVAL;
2692
2693				r = amdgpu_atombios_init(adev);
2694				if (r) {
2695					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2696					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2697					return r;
2698				}
2699			}
2700
2701			/*get pf2vf msg info at it's earliest time*/
2702			if (amdgpu_sriov_vf(adev))
2703				amdgpu_virt_init_data_exchange(adev);
2704
2705		}
2706	}
2707	if (!total)
2708		return -ENODEV;
2709
2710	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2711	if (ip_block->status.valid != false)
2712		amdgpu_amdkfd_device_probe(adev);
2713
2714	adev->cg_flags &= amdgpu_cg_mask;
2715	adev->pg_flags &= amdgpu_pg_mask;
2716
2717	return 0;
2718}
2719
2720static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2721{
2722	int i, r;
2723
2724	for (i = 0; i < adev->num_ip_blocks; i++) {
2725		if (!adev->ip_blocks[i].status.sw)
2726			continue;
2727		if (adev->ip_blocks[i].status.hw)
2728			continue;
2729		if (!amdgpu_ip_member_of_hwini(
2730			    adev, adev->ip_blocks[i].version->type))
2731			continue;
2732		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2733		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2734		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2735			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2736			if (r) {
2737				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2738					  adev->ip_blocks[i].version->funcs->name, r);
2739				return r;
2740			}
2741			adev->ip_blocks[i].status.hw = true;
2742		}
2743	}
2744
2745	return 0;
2746}
2747
2748static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2749{
2750	int i, r;
2751
2752	for (i = 0; i < adev->num_ip_blocks; i++) {
2753		if (!adev->ip_blocks[i].status.sw)
2754			continue;
2755		if (adev->ip_blocks[i].status.hw)
2756			continue;
2757		if (!amdgpu_ip_member_of_hwini(
2758			    adev, adev->ip_blocks[i].version->type))
2759			continue;
2760		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2761		if (r) {
2762			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2763				  adev->ip_blocks[i].version->funcs->name, r);
2764			return r;
2765		}
2766		adev->ip_blocks[i].status.hw = true;
2767	}
2768
2769	return 0;
2770}
2771
2772static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2773{
2774	int r = 0;
2775	int i;
2776	uint32_t smu_version;
2777
2778	if (adev->asic_type >= CHIP_VEGA10) {
2779		for (i = 0; i < adev->num_ip_blocks; i++) {
2780			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2781				continue;
2782
2783			if (!amdgpu_ip_member_of_hwini(adev,
2784						       AMD_IP_BLOCK_TYPE_PSP))
2785				break;
2786
2787			if (!adev->ip_blocks[i].status.sw)
2788				continue;
2789
2790			/* no need to do the fw loading again if already done*/
2791			if (adev->ip_blocks[i].status.hw == true)
2792				break;
2793
2794			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2795				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2796				if (r)
2797					return r;
2798			} else {
2799				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2800				if (r) {
2801					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2802							  adev->ip_blocks[i].version->funcs->name, r);
2803					return r;
2804				}
2805				adev->ip_blocks[i].status.hw = true;
2806			}
2807			break;
2808		}
2809	}
2810
2811	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2812		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2813
2814	return r;
2815}
2816
2817static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2818{
2819	long timeout;
2820	int r, i;
2821
2822	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2823		struct amdgpu_ring *ring = adev->rings[i];
2824
2825		/* No need to setup the GPU scheduler for rings that don't need it */
2826		if (!ring || ring->no_scheduler)
2827			continue;
2828
2829		switch (ring->funcs->type) {
2830		case AMDGPU_RING_TYPE_GFX:
2831			timeout = adev->gfx_timeout;
2832			break;
2833		case AMDGPU_RING_TYPE_COMPUTE:
2834			timeout = adev->compute_timeout;
2835			break;
2836		case AMDGPU_RING_TYPE_SDMA:
2837			timeout = adev->sdma_timeout;
2838			break;
2839		default:
2840			timeout = adev->video_timeout;
2841			break;
2842		}
2843
2844		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2845				   DRM_SCHED_PRIORITY_COUNT,
2846				   ring->num_hw_submission, 0,
2847				   timeout, adev->reset_domain->wq,
2848				   ring->sched_score, ring->name,
2849				   adev->dev);
2850		if (r) {
2851			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2852				  ring->name);
2853			return r;
2854		}
2855		r = amdgpu_uvd_entity_init(adev, ring);
2856		if (r) {
2857			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2858				  ring->name);
2859			return r;
2860		}
2861		r = amdgpu_vce_entity_init(adev, ring);
2862		if (r) {
2863			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2864				  ring->name);
2865			return r;
2866		}
2867	}
2868
2869	amdgpu_xcp_update_partition_sched_list(adev);
2870
2871	return 0;
2872}
2873
2874
2875/**
2876 * amdgpu_device_ip_init - run init for hardware IPs
2877 *
2878 * @adev: amdgpu_device pointer
2879 *
2880 * Main initialization pass for hardware IPs.  The list of all the hardware
2881 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2882 * are run.  sw_init initializes the software state associated with each IP
2883 * and hw_init initializes the hardware associated with each IP.
2884 * Returns 0 on success, negative error code on failure.
2885 */
2886static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2887{
2888	bool init_badpage;
2889	int i, r;
2890
2891	r = amdgpu_ras_init(adev);
2892	if (r)
2893		return r;
2894
2895	for (i = 0; i < adev->num_ip_blocks; i++) {
2896		if (!adev->ip_blocks[i].status.valid)
2897			continue;
2898		if (adev->ip_blocks[i].version->funcs->sw_init) {
2899			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2900			if (r) {
2901				DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2902					  adev->ip_blocks[i].version->funcs->name, r);
2903				goto init_failed;
2904			}
2905		}
2906		adev->ip_blocks[i].status.sw = true;
2907
2908		if (!amdgpu_ip_member_of_hwini(
2909			    adev, adev->ip_blocks[i].version->type))
2910			continue;
2911
2912		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2913			/* need to do common hw init early so everything is set up for gmc */
2914			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2915			if (r) {
2916				DRM_ERROR("hw_init %d failed %d\n", i, r);
2917				goto init_failed;
2918			}
2919			adev->ip_blocks[i].status.hw = true;
2920		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2921			/* need to do gmc hw init early so we can allocate gpu mem */
2922			/* Try to reserve bad pages early */
2923			if (amdgpu_sriov_vf(adev))
2924				amdgpu_virt_exchange_data(adev);
2925
2926			r = amdgpu_device_mem_scratch_init(adev);
2927			if (r) {
2928				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2929				goto init_failed;
2930			}
2931			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2932			if (r) {
2933				DRM_ERROR("hw_init %d failed %d\n", i, r);
2934				goto init_failed;
2935			}
2936			r = amdgpu_device_wb_init(adev);
2937			if (r) {
2938				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2939				goto init_failed;
2940			}
2941			adev->ip_blocks[i].status.hw = true;
2942
2943			/* right after GMC hw init, we create CSA */
2944			if (adev->gfx.mcbp) {
2945				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2946							       AMDGPU_GEM_DOMAIN_VRAM |
2947							       AMDGPU_GEM_DOMAIN_GTT,
2948							       AMDGPU_CSA_SIZE);
2949				if (r) {
2950					DRM_ERROR("allocate CSA failed %d\n", r);
2951					goto init_failed;
2952				}
2953			}
2954
2955			r = amdgpu_seq64_init(adev);
2956			if (r) {
2957				DRM_ERROR("allocate seq64 failed %d\n", r);
2958				goto init_failed;
2959			}
2960		}
2961	}
2962
2963	if (amdgpu_sriov_vf(adev))
2964		amdgpu_virt_init_data_exchange(adev);
2965
2966	r = amdgpu_ib_pool_init(adev);
2967	if (r) {
2968		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2969		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2970		goto init_failed;
2971	}
2972
2973	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2974	if (r)
2975		goto init_failed;
2976
2977	r = amdgpu_device_ip_hw_init_phase1(adev);
2978	if (r)
2979		goto init_failed;
2980
2981	r = amdgpu_device_fw_loading(adev);
2982	if (r)
2983		goto init_failed;
2984
2985	r = amdgpu_device_ip_hw_init_phase2(adev);
2986	if (r)
2987		goto init_failed;
2988
2989	/*
2990	 * retired pages will be loaded from eeprom and reserved here,
2991	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2992	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2993	 * for I2C communication which only true at this point.
2994	 *
2995	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2996	 * failure from bad gpu situation and stop amdgpu init process
2997	 * accordingly. For other failed cases, it will still release all
2998	 * the resource and print error message, rather than returning one
2999	 * negative value to upper level.
3000	 *
3001	 * Note: theoretically, this should be called before all vram allocations
3002	 * to protect retired page from abusing
3003	 */
3004	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3005	r = amdgpu_ras_recovery_init(adev, init_badpage);
3006	if (r)
3007		goto init_failed;
3008
3009	/**
3010	 * In case of XGMI grab extra reference for reset domain for this device
3011	 */
3012	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3013		if (amdgpu_xgmi_add_device(adev) == 0) {
3014			if (!amdgpu_sriov_vf(adev)) {
3015				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3016
3017				if (WARN_ON(!hive)) {
3018					r = -ENOENT;
3019					goto init_failed;
3020				}
3021
3022				if (!hive->reset_domain ||
3023				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
3024					r = -ENOENT;
3025					amdgpu_put_xgmi_hive(hive);
3026					goto init_failed;
3027				}
3028
3029				/* Drop the early temporary reset domain we created for device */
3030				amdgpu_reset_put_reset_domain(adev->reset_domain);
3031				adev->reset_domain = hive->reset_domain;
3032				amdgpu_put_xgmi_hive(hive);
3033			}
3034		}
3035	}
3036
3037	r = amdgpu_device_init_schedulers(adev);
3038	if (r)
3039		goto init_failed;
3040
3041	if (adev->mman.buffer_funcs_ring->sched.ready)
3042		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3043
3044	/* Don't init kfd if whole hive need to be reset during init */
3045	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3046		kgd2kfd_init_zone_device(adev);
3047		amdgpu_amdkfd_device_init(adev);
3048	}
3049
3050	amdgpu_fru_get_product_info(adev);
3051
3052init_failed:
3053
3054	return r;
3055}
3056
3057/**
3058 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3059 *
3060 * @adev: amdgpu_device pointer
3061 *
3062 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
3063 * this function before a GPU reset.  If the value is retained after a
3064 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
3065 */
3066static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3067{
3068	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3069}
3070
3071/**
3072 * amdgpu_device_check_vram_lost - check if vram is valid
3073 *
3074 * @adev: amdgpu_device pointer
3075 *
3076 * Checks the reset magic value written to the gart pointer in VRAM.
3077 * The driver calls this after a GPU reset to see if the contents of
3078 * VRAM is lost or now.
3079 * returns true if vram is lost, false if not.
3080 */
3081static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3082{
3083	if (memcmp(adev->gart.ptr, adev->reset_magic,
3084			AMDGPU_RESET_MAGIC_NUM))
3085		return true;
3086
3087	if (!amdgpu_in_reset(adev))
3088		return false;
3089
3090	/*
3091	 * For all ASICs with baco/mode1 reset, the VRAM is
3092	 * always assumed to be lost.
3093	 */
3094	switch (amdgpu_asic_reset_method(adev)) {
3095	case AMD_RESET_METHOD_BACO:
3096	case AMD_RESET_METHOD_MODE1:
3097		return true;
3098	default:
3099		return false;
3100	}
3101}
3102
3103/**
3104 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3105 *
3106 * @adev: amdgpu_device pointer
3107 * @state: clockgating state (gate or ungate)
3108 *
3109 * The list of all the hardware IPs that make up the asic is walked and the
3110 * set_clockgating_state callbacks are run.
3111 * Late initialization pass enabling clockgating for hardware IPs.
3112 * Fini or suspend, pass disabling clockgating for hardware IPs.
3113 * Returns 0 on success, negative error code on failure.
3114 */
3115
3116int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3117			       enum amd_clockgating_state state)
3118{
3119	int i, j, r;
3120
3121	if (amdgpu_emu_mode == 1)
3122		return 0;
3123
3124	for (j = 0; j < adev->num_ip_blocks; j++) {
3125		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3126		if (!adev->ip_blocks[i].status.late_initialized)
3127			continue;
3128		/* skip CG for GFX, SDMA on S0ix */
3129		if (adev->in_s0ix &&
3130		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3131		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3132			continue;
3133		/* skip CG for VCE/UVD, it's handled specially */
3134		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3135		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3136		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3137		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3138		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3139			/* enable clockgating to save power */
3140			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3141										     state);
3142			if (r) {
3143				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3144					  adev->ip_blocks[i].version->funcs->name, r);
3145				return r;
3146			}
3147		}
3148	}
3149
3150	return 0;
3151}
3152
3153int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3154			       enum amd_powergating_state state)
3155{
3156	int i, j, r;
3157
3158	if (amdgpu_emu_mode == 1)
3159		return 0;
3160
3161	for (j = 0; j < adev->num_ip_blocks; j++) {
3162		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3163		if (!adev->ip_blocks[i].status.late_initialized)
3164			continue;
3165		/* skip PG for GFX, SDMA on S0ix */
3166		if (adev->in_s0ix &&
3167		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3168		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3169			continue;
3170		/* skip CG for VCE/UVD, it's handled specially */
3171		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3172		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3173		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3174		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3175		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3176			/* enable powergating to save power */
3177			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3178											state);
3179			if (r) {
3180				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3181					  adev->ip_blocks[i].version->funcs->name, r);
3182				return r;
3183			}
3184		}
3185	}
3186	return 0;
3187}
3188
3189static int amdgpu_device_enable_mgpu_fan_boost(void)
3190{
3191	struct amdgpu_gpu_instance *gpu_ins;
3192	struct amdgpu_device *adev;
3193	int i, ret = 0;
3194
3195	mutex_lock(&mgpu_info.mutex);
3196
3197	/*
3198	 * MGPU fan boost feature should be enabled
3199	 * only when there are two or more dGPUs in
3200	 * the system
3201	 */
3202	if (mgpu_info.num_dgpu < 2)
3203		goto out;
3204
3205	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3206		gpu_ins = &(mgpu_info.gpu_ins[i]);
3207		adev = gpu_ins->adev;
3208		if (!(adev->flags & AMD_IS_APU) &&
3209		    !gpu_ins->mgpu_fan_enabled) {
3210			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3211			if (ret)
3212				break;
3213
3214			gpu_ins->mgpu_fan_enabled = 1;
3215		}
3216	}
3217
3218out:
3219	mutex_unlock(&mgpu_info.mutex);
3220
3221	return ret;
3222}
3223
3224/**
3225 * amdgpu_device_ip_late_init - run late init for hardware IPs
3226 *
3227 * @adev: amdgpu_device pointer
3228 *
3229 * Late initialization pass for hardware IPs.  The list of all the hardware
3230 * IPs that make up the asic is walked and the late_init callbacks are run.
3231 * late_init covers any special initialization that an IP requires
3232 * after all of the have been initialized or something that needs to happen
3233 * late in the init process.
3234 * Returns 0 on success, negative error code on failure.
3235 */
3236static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3237{
3238	struct amdgpu_gpu_instance *gpu_instance;
3239	int i = 0, r;
3240
3241	for (i = 0; i < adev->num_ip_blocks; i++) {
3242		if (!adev->ip_blocks[i].status.hw)
3243			continue;
3244		if (adev->ip_blocks[i].version->funcs->late_init) {
3245			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3246			if (r) {
3247				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3248					  adev->ip_blocks[i].version->funcs->name, r);
3249				return r;
3250			}
3251		}
3252		adev->ip_blocks[i].status.late_initialized = true;
3253	}
3254
3255	r = amdgpu_ras_late_init(adev);
3256	if (r) {
3257		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3258		return r;
3259	}
3260
3261	if (!amdgpu_reset_in_recovery(adev))
3262		amdgpu_ras_set_error_query_ready(adev, true);
3263
3264	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3265	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3266
3267	amdgpu_device_fill_reset_magic(adev);
3268
3269	r = amdgpu_device_enable_mgpu_fan_boost();
3270	if (r)
3271		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3272
3273	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3274	if (amdgpu_passthrough(adev) &&
3275	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3276	     adev->asic_type == CHIP_ALDEBARAN))
3277		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3278
3279	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3280		mutex_lock(&mgpu_info.mutex);
3281
3282		/*
3283		 * Reset device p-state to low as this was booted with high.
3284		 *
3285		 * This should be performed only after all devices from the same
3286		 * hive get initialized.
3287		 *
3288		 * However, it's unknown how many device in the hive in advance.
3289		 * As this is counted one by one during devices initializations.
3290		 *
3291		 * So, we wait for all XGMI interlinked devices initialized.
3292		 * This may bring some delays as those devices may come from
3293		 * different hives. But that should be OK.
3294		 */
3295		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3296			for (i = 0; i < mgpu_info.num_gpu; i++) {
3297				gpu_instance = &(mgpu_info.gpu_ins[i]);
3298				if (gpu_instance->adev->flags & AMD_IS_APU)
3299					continue;
3300
3301				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3302						AMDGPU_XGMI_PSTATE_MIN);
3303				if (r) {
3304					DRM_ERROR("pstate setting failed (%d).\n", r);
3305					break;
3306				}
3307			}
3308		}
3309
3310		mutex_unlock(&mgpu_info.mutex);
3311	}
3312
3313	return 0;
3314}
3315
3316static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3317{
3318	int r;
3319
3320	if (!ip_block->version->funcs->hw_fini) {
3321		DRM_ERROR("hw_fini of IP block <%s> not defined\n",
3322			  ip_block->version->funcs->name);
3323	} else {
3324		r = ip_block->version->funcs->hw_fini(ip_block);
3325		/* XXX handle errors */
3326		if (r) {
3327			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3328				  ip_block->version->funcs->name, r);
3329		}
3330	}
3331
3332	ip_block->status.hw = false;
3333}
3334
3335/**
3336 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3337 *
3338 * @adev: amdgpu_device pointer
3339 *
3340 * For ASICs need to disable SMC first
3341 */
3342static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3343{
3344	int i;
3345
3346	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3347		return;
3348
3349	for (i = 0; i < adev->num_ip_blocks; i++) {
3350		if (!adev->ip_blocks[i].status.hw)
3351			continue;
3352		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3353			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3354			break;
3355		}
3356	}
3357}
3358
3359static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3360{
3361	int i, r;
3362
3363	for (i = 0; i < adev->num_ip_blocks; i++) {
3364		if (!adev->ip_blocks[i].version->funcs->early_fini)
3365			continue;
3366
3367		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3368		if (r) {
3369			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3370				  adev->ip_blocks[i].version->funcs->name, r);
3371		}
3372	}
3373
3374	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3375	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3376
3377	amdgpu_amdkfd_suspend(adev, false);
3378
3379	/* Workaroud for ASICs need to disable SMC first */
3380	amdgpu_device_smu_fini_early(adev);
3381
3382	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3383		if (!adev->ip_blocks[i].status.hw)
3384			continue;
3385
3386		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3387	}
3388
3389	if (amdgpu_sriov_vf(adev)) {
3390		if (amdgpu_virt_release_full_gpu(adev, false))
3391			DRM_ERROR("failed to release exclusive mode on fini\n");
3392	}
3393
3394	return 0;
3395}
3396
3397/**
3398 * amdgpu_device_ip_fini - run fini for hardware IPs
3399 *
3400 * @adev: amdgpu_device pointer
3401 *
3402 * Main teardown pass for hardware IPs.  The list of all the hardware
3403 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3404 * are run.  hw_fini tears down the hardware associated with each IP
3405 * and sw_fini tears down any software state associated with each IP.
3406 * Returns 0 on success, negative error code on failure.
3407 */
3408static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3409{
3410	int i, r;
3411
3412	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3413		amdgpu_virt_release_ras_err_handler_data(adev);
3414
3415	if (adev->gmc.xgmi.num_physical_nodes > 1)
3416		amdgpu_xgmi_remove_device(adev);
3417
3418	amdgpu_amdkfd_device_fini_sw(adev);
3419
3420	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3421		if (!adev->ip_blocks[i].status.sw)
3422			continue;
3423
3424		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3425			amdgpu_ucode_free_bo(adev);
3426			amdgpu_free_static_csa(&adev->virt.csa_obj);
3427			amdgpu_device_wb_fini(adev);
3428			amdgpu_device_mem_scratch_fini(adev);
3429			amdgpu_ib_pool_fini(adev);
3430			amdgpu_seq64_fini(adev);
3431		}
3432		if (adev->ip_blocks[i].version->funcs->sw_fini) {
3433			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3434			/* XXX handle errors */
3435			if (r) {
3436				DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3437					  adev->ip_blocks[i].version->funcs->name, r);
3438			}
3439		}
3440		adev->ip_blocks[i].status.sw = false;
3441		adev->ip_blocks[i].status.valid = false;
3442	}
3443
3444	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3445		if (!adev->ip_blocks[i].status.late_initialized)
3446			continue;
3447		if (adev->ip_blocks[i].version->funcs->late_fini)
3448			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3449		adev->ip_blocks[i].status.late_initialized = false;
3450	}
3451
3452	amdgpu_ras_fini(adev);
3453
3454	return 0;
3455}
3456
3457/**
3458 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3459 *
3460 * @work: work_struct.
3461 */
3462static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3463{
3464	struct amdgpu_device *adev =
3465		container_of(work, struct amdgpu_device, delayed_init_work.work);
3466	int r;
3467
3468	r = amdgpu_ib_ring_tests(adev);
3469	if (r)
3470		DRM_ERROR("ib ring test failed (%d).\n", r);
3471}
3472
3473static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3474{
3475	struct amdgpu_device *adev =
3476		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3477
3478	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3479	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3480
3481	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3482		adev->gfx.gfx_off_state = true;
3483}
3484
3485/**
3486 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3487 *
3488 * @adev: amdgpu_device pointer
3489 *
3490 * Main suspend function for hardware IPs.  The list of all the hardware
3491 * IPs that make up the asic is walked, clockgating is disabled and the
3492 * suspend callbacks are run.  suspend puts the hardware and software state
3493 * in each IP into a state suitable for suspend.
3494 * Returns 0 on success, negative error code on failure.
3495 */
3496static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3497{
3498	int i, r;
3499
3500	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3501	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3502
3503	/*
3504	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3505	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3506	 * scenario. Add the missing df cstate disablement here.
3507	 */
3508	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3509		dev_warn(adev->dev, "Failed to disallow df cstate");
3510
3511	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3512		if (!adev->ip_blocks[i].status.valid)
3513			continue;
3514
3515		/* displays are handled separately */
3516		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3517			continue;
3518
3519		/* XXX handle errors */
3520		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3521		if (r)
3522			return r;
3523	}
3524
3525	return 0;
3526}
3527
3528/**
3529 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3530 *
3531 * @adev: amdgpu_device pointer
3532 *
3533 * Main suspend function for hardware IPs.  The list of all the hardware
3534 * IPs that make up the asic is walked, clockgating is disabled and the
3535 * suspend callbacks are run.  suspend puts the hardware and software state
3536 * in each IP into a state suitable for suspend.
3537 * Returns 0 on success, negative error code on failure.
3538 */
3539static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3540{
3541	int i, r;
3542
3543	if (adev->in_s0ix)
3544		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3545
3546	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3547		if (!adev->ip_blocks[i].status.valid)
3548			continue;
3549		/* displays are handled in phase1 */
3550		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3551			continue;
3552		/* PSP lost connection when err_event_athub occurs */
3553		if (amdgpu_ras_intr_triggered() &&
3554		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3555			adev->ip_blocks[i].status.hw = false;
3556			continue;
3557		}
3558
3559		/* skip unnecessary suspend if we do not initialize them yet */
3560		if (!amdgpu_ip_member_of_hwini(
3561			    adev, adev->ip_blocks[i].version->type))
3562			continue;
3563
3564		/* skip suspend of gfx/mes and psp for S0ix
3565		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3566		 * like at runtime. PSP is also part of the always on hardware
3567		 * so no need to suspend it.
3568		 */
3569		if (adev->in_s0ix &&
3570		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3571		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3572		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3573			continue;
3574
3575		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3576		if (adev->in_s0ix &&
3577		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3578		     IP_VERSION(5, 0, 0)) &&
3579		    (adev->ip_blocks[i].version->type ==
3580		     AMD_IP_BLOCK_TYPE_SDMA))
3581			continue;
3582
3583		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3584		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3585		 * from this location and RLC Autoload automatically also gets loaded
3586		 * from here based on PMFW -> PSP message during re-init sequence.
3587		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3588		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3589		 */
3590		if (amdgpu_in_reset(adev) &&
3591		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3592		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3593			continue;
3594
3595		/* XXX handle errors */
3596		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3597		adev->ip_blocks[i].status.hw = false;
3598
3599		/* handle putting the SMC in the appropriate state */
3600		if (!amdgpu_sriov_vf(adev)) {
3601			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3602				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3603				if (r) {
3604					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3605							adev->mp1_state, r);
3606					return r;
3607				}
3608			}
3609		}
3610	}
3611
3612	return 0;
3613}
3614
3615/**
3616 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3617 *
3618 * @adev: amdgpu_device pointer
3619 *
3620 * Main suspend function for hardware IPs.  The list of all the hardware
3621 * IPs that make up the asic is walked, clockgating is disabled and the
3622 * suspend callbacks are run.  suspend puts the hardware and software state
3623 * in each IP into a state suitable for suspend.
3624 * Returns 0 on success, negative error code on failure.
3625 */
3626int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3627{
3628	int r;
3629
3630	if (amdgpu_sriov_vf(adev)) {
3631		amdgpu_virt_fini_data_exchange(adev);
3632		amdgpu_virt_request_full_gpu(adev, false);
3633	}
3634
3635	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3636
3637	r = amdgpu_device_ip_suspend_phase1(adev);
3638	if (r)
3639		return r;
3640	r = amdgpu_device_ip_suspend_phase2(adev);
3641
3642	if (amdgpu_sriov_vf(adev))
3643		amdgpu_virt_release_full_gpu(adev, false);
3644
3645	return r;
3646}
3647
3648static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3649{
3650	int i, r;
3651
3652	static enum amd_ip_block_type ip_order[] = {
3653		AMD_IP_BLOCK_TYPE_COMMON,
3654		AMD_IP_BLOCK_TYPE_GMC,
3655		AMD_IP_BLOCK_TYPE_PSP,
3656		AMD_IP_BLOCK_TYPE_IH,
3657	};
3658
3659	for (i = 0; i < adev->num_ip_blocks; i++) {
3660		int j;
3661		struct amdgpu_ip_block *block;
3662
3663		block = &adev->ip_blocks[i];
3664		block->status.hw = false;
3665
3666		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3667
3668			if (block->version->type != ip_order[j] ||
3669				!block->status.valid)
3670				continue;
3671
3672			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3673			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3674			if (r)
3675				return r;
3676			block->status.hw = true;
3677		}
3678	}
3679
3680	return 0;
3681}
3682
3683static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3684{
3685	int i, r;
3686
3687	static enum amd_ip_block_type ip_order[] = {
3688		AMD_IP_BLOCK_TYPE_SMC,
3689		AMD_IP_BLOCK_TYPE_DCE,
3690		AMD_IP_BLOCK_TYPE_GFX,
3691		AMD_IP_BLOCK_TYPE_SDMA,
3692		AMD_IP_BLOCK_TYPE_MES,
3693		AMD_IP_BLOCK_TYPE_UVD,
3694		AMD_IP_BLOCK_TYPE_VCE,
3695		AMD_IP_BLOCK_TYPE_VCN,
3696		AMD_IP_BLOCK_TYPE_JPEG
3697	};
3698
3699	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3700		int j;
3701		struct amdgpu_ip_block *block;
3702
3703		for (j = 0; j < adev->num_ip_blocks; j++) {
3704			block = &adev->ip_blocks[j];
3705
3706			if (block->version->type != ip_order[i] ||
3707				!block->status.valid ||
3708				block->status.hw)
3709				continue;
3710
3711			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3712				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3713				if (r)
3714					return r;
3715			} else {
3716				r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3717				if (r) {
3718					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
3719						  adev->ip_blocks[i].version->funcs->name, r);
3720					return r;
3721				}
3722				block->status.hw = true;
3723			}
3724		}
3725	}
3726
3727	return 0;
3728}
3729
3730/**
3731 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3732 *
3733 * @adev: amdgpu_device pointer
3734 *
3735 * First resume function for hardware IPs.  The list of all the hardware
3736 * IPs that make up the asic is walked and the resume callbacks are run for
3737 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3738 * after a suspend and updates the software state as necessary.  This
3739 * function is also used for restoring the GPU after a GPU reset.
3740 * Returns 0 on success, negative error code on failure.
3741 */
3742static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3743{
3744	int i, r;
3745
3746	for (i = 0; i < adev->num_ip_blocks; i++) {
3747		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3748			continue;
3749		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3750		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3751		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3752		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3753
3754			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3755			if (r)
3756				return r;
3757		}
3758	}
3759
3760	return 0;
3761}
3762
3763/**
3764 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3765 *
3766 * @adev: amdgpu_device pointer
3767 *
3768 * First resume function for hardware IPs.  The list of all the hardware
3769 * IPs that make up the asic is walked and the resume callbacks are run for
3770 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3771 * functional state after a suspend and updates the software state as
3772 * necessary.  This function is also used for restoring the GPU after a GPU
3773 * reset.
3774 * Returns 0 on success, negative error code on failure.
3775 */
3776static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3777{
3778	int i, r;
3779
3780	for (i = 0; i < adev->num_ip_blocks; i++) {
3781		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3782			continue;
3783		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3784		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3785		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3786		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3787			continue;
3788		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3789		if (r)
3790			return r;
3791	}
3792
3793	return 0;
3794}
3795
3796/**
3797 * amdgpu_device_ip_resume - run resume for hardware IPs
3798 *
3799 * @adev: amdgpu_device pointer
3800 *
3801 * Main resume function for hardware IPs.  The hardware IPs
3802 * are split into two resume functions because they are
3803 * also used in recovering from a GPU reset and some additional
3804 * steps need to be take between them.  In this case (S3/S4) they are
3805 * run sequentially.
3806 * Returns 0 on success, negative error code on failure.
3807 */
3808static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3809{
3810	int r;
3811
3812	r = amdgpu_device_ip_resume_phase1(adev);
3813	if (r)
3814		return r;
3815
3816	r = amdgpu_device_fw_loading(adev);
3817	if (r)
3818		return r;
3819
3820	r = amdgpu_device_ip_resume_phase2(adev);
3821
3822	if (adev->mman.buffer_funcs_ring->sched.ready)
3823		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3824
3825	return r;
3826}
3827
3828/**
3829 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3830 *
3831 * @adev: amdgpu_device pointer
3832 *
3833 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3834 */
3835static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3836{
3837	if (amdgpu_sriov_vf(adev)) {
3838		if (adev->is_atom_fw) {
3839			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3840				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3841		} else {
3842			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3843				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3844		}
3845
3846		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3847			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3848	}
3849}
3850
3851/**
3852 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3853 *
3854 * @asic_type: AMD asic type
3855 *
3856 * Check if there is DC (new modesetting infrastructre) support for an asic.
3857 * returns true if DC has support, false if not.
3858 */
3859bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3860{
3861	switch (asic_type) {
3862#ifdef CONFIG_DRM_AMDGPU_SI
3863	case CHIP_HAINAN:
3864#endif
3865	case CHIP_TOPAZ:
3866		/* chips with no display hardware */
3867		return false;
3868#if defined(CONFIG_DRM_AMD_DC)
3869	case CHIP_TAHITI:
3870	case CHIP_PITCAIRN:
3871	case CHIP_VERDE:
3872	case CHIP_OLAND:
3873		/*
3874		 * We have systems in the wild with these ASICs that require
3875		 * LVDS and VGA support which is not supported with DC.
3876		 *
3877		 * Fallback to the non-DC driver here by default so as not to
3878		 * cause regressions.
3879		 */
3880#if defined(CONFIG_DRM_AMD_DC_SI)
3881		return amdgpu_dc > 0;
3882#else
3883		return false;
3884#endif
3885	case CHIP_BONAIRE:
3886	case CHIP_KAVERI:
3887	case CHIP_KABINI:
3888	case CHIP_MULLINS:
3889		/*
3890		 * We have systems in the wild with these ASICs that require
3891		 * VGA support which is not supported with DC.
3892		 *
3893		 * Fallback to the non-DC driver here by default so as not to
3894		 * cause regressions.
3895		 */
3896		return amdgpu_dc > 0;
3897	default:
3898		return amdgpu_dc != 0;
3899#else
3900	default:
3901		if (amdgpu_dc > 0)
3902			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3903		return false;
3904#endif
3905	}
3906}
3907
3908/**
3909 * amdgpu_device_has_dc_support - check if dc is supported
3910 *
3911 * @adev: amdgpu_device pointer
3912 *
3913 * Returns true for supported, false for not supported
3914 */
3915bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3916{
3917	if (adev->enable_virtual_display ||
3918	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3919		return false;
3920
3921	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3922}
3923
3924static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3925{
3926	struct amdgpu_device *adev =
3927		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3928	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3929
3930	/* It's a bug to not have a hive within this function */
3931	if (WARN_ON(!hive))
3932		return;
3933
3934	/*
3935	 * Use task barrier to synchronize all xgmi reset works across the
3936	 * hive. task_barrier_enter and task_barrier_exit will block
3937	 * until all the threads running the xgmi reset works reach
3938	 * those points. task_barrier_full will do both blocks.
3939	 */
3940	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3941
3942		task_barrier_enter(&hive->tb);
3943		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3944
3945		if (adev->asic_reset_res)
3946			goto fail;
3947
3948		task_barrier_exit(&hive->tb);
3949		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3950
3951		if (adev->asic_reset_res)
3952			goto fail;
3953
3954		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3955	} else {
3956
3957		task_barrier_full(&hive->tb);
3958		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3959	}
3960
3961fail:
3962	if (adev->asic_reset_res)
3963		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3964			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3965	amdgpu_put_xgmi_hive(hive);
3966}
3967
3968static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3969{
3970	char *input = amdgpu_lockup_timeout;
3971	char *timeout_setting = NULL;
3972	int index = 0;
3973	long timeout;
3974	int ret = 0;
3975
3976	/*
3977	 * By default timeout for non compute jobs is 10000
3978	 * and 60000 for compute jobs.
3979	 * In SR-IOV or passthrough mode, timeout for compute
3980	 * jobs are 60000 by default.
3981	 */
3982	adev->gfx_timeout = msecs_to_jiffies(10000);
3983	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3984	if (amdgpu_sriov_vf(adev))
3985		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3986					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3987	else
3988		adev->compute_timeout =  msecs_to_jiffies(60000);
3989
3990	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3991		while ((timeout_setting = strsep(&input, ",")) &&
3992				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3993			ret = kstrtol(timeout_setting, 0, &timeout);
3994			if (ret)
3995				return ret;
3996
3997			if (timeout == 0) {
3998				index++;
3999				continue;
4000			} else if (timeout < 0) {
4001				timeout = MAX_SCHEDULE_TIMEOUT;
4002				dev_warn(adev->dev, "lockup timeout disabled");
4003				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
4004			} else {
4005				timeout = msecs_to_jiffies(timeout);
4006			}
4007
4008			switch (index++) {
4009			case 0:
4010				adev->gfx_timeout = timeout;
4011				break;
4012			case 1:
4013				adev->compute_timeout = timeout;
4014				break;
4015			case 2:
4016				adev->sdma_timeout = timeout;
4017				break;
4018			case 3:
4019				adev->video_timeout = timeout;
4020				break;
4021			default:
4022				break;
4023			}
4024		}
4025		/*
4026		 * There is only one value specified and
4027		 * it should apply to all non-compute jobs.
4028		 */
4029		if (index == 1) {
4030			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
4031			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
4032				adev->compute_timeout = adev->gfx_timeout;
4033		}
4034	}
4035
4036	return ret;
4037}
4038
4039/**
4040 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4041 *
4042 * @adev: amdgpu_device pointer
4043 *
4044 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4045 */
4046static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4047{
4048	struct iommu_domain *domain;
4049
4050	domain = iommu_get_domain_for_dev(adev->dev);
4051	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4052		adev->ram_is_direct_mapped = true;
4053}
4054
4055#if defined(CONFIG_HSA_AMD_P2P)
4056/**
4057 * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4058 *
4059 * @adev: amdgpu_device pointer
4060 *
4061 * return if IOMMU remapping bar address
4062 */
4063static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4064{
4065	struct iommu_domain *domain;
4066
4067	domain = iommu_get_domain_for_dev(adev->dev);
4068	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4069		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
4070		return true;
4071
4072	return false;
4073}
4074#endif
4075
4076static const struct attribute *amdgpu_dev_attributes[] = {
4077	&dev_attr_pcie_replay_count.attr,
4078	NULL
4079};
4080
4081static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4082{
4083	if (amdgpu_mcbp == 1)
4084		adev->gfx.mcbp = true;
4085	else if (amdgpu_mcbp == 0)
4086		adev->gfx.mcbp = false;
4087
4088	if (amdgpu_sriov_vf(adev))
4089		adev->gfx.mcbp = true;
4090
4091	if (adev->gfx.mcbp)
4092		DRM_INFO("MCBP is enabled\n");
4093}
4094
4095/**
4096 * amdgpu_device_init - initialize the driver
4097 *
4098 * @adev: amdgpu_device pointer
4099 * @flags: driver flags
4100 *
4101 * Initializes the driver info and hw (all asics).
4102 * Returns 0 for success or an error on failure.
4103 * Called at driver startup.
4104 */
4105int amdgpu_device_init(struct amdgpu_device *adev,
4106		       uint32_t flags)
4107{
4108	struct drm_device *ddev = adev_to_drm(adev);
4109	struct pci_dev *pdev = adev->pdev;
4110	int r, i;
4111	bool px = false;
4112	u32 max_MBps;
4113	int tmp;
4114
4115	adev->shutdown = false;
4116	adev->flags = flags;
4117
4118	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4119		adev->asic_type = amdgpu_force_asic_type;
4120	else
4121		adev->asic_type = flags & AMD_ASIC_MASK;
4122
4123	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4124	if (amdgpu_emu_mode == 1)
4125		adev->usec_timeout *= 10;
4126	adev->gmc.gart_size = 512 * 1024 * 1024;
4127	adev->accel_working = false;
4128	adev->num_rings = 0;
4129	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4130	adev->mman.buffer_funcs = NULL;
4131	adev->mman.buffer_funcs_ring = NULL;
4132	adev->vm_manager.vm_pte_funcs = NULL;
4133	adev->vm_manager.vm_pte_num_scheds = 0;
4134	adev->gmc.gmc_funcs = NULL;
4135	adev->harvest_ip_mask = 0x0;
4136	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4137	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4138
4139	adev->smc_rreg = &amdgpu_invalid_rreg;
4140	adev->smc_wreg = &amdgpu_invalid_wreg;
4141	adev->pcie_rreg = &amdgpu_invalid_rreg;
4142	adev->pcie_wreg = &amdgpu_invalid_wreg;
4143	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4144	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4145	adev->pciep_rreg = &amdgpu_invalid_rreg;
4146	adev->pciep_wreg = &amdgpu_invalid_wreg;
4147	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4148	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4149	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4150	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4151	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4152	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4153	adev->didt_rreg = &amdgpu_invalid_rreg;
4154	adev->didt_wreg = &amdgpu_invalid_wreg;
4155	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4156	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4157	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4158	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4159
4160	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4161		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4162		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4163
4164	/* mutex initialization are all done here so we
4165	 * can recall function without having locking issues
4166	 */
4167	mutex_init(&adev->firmware.mutex);
4168	mutex_init(&adev->pm.mutex);
4169	mutex_init(&adev->gfx.gpu_clock_mutex);
4170	mutex_init(&adev->srbm_mutex);
4171	mutex_init(&adev->gfx.pipe_reserve_mutex);
4172	mutex_init(&adev->gfx.gfx_off_mutex);
4173	mutex_init(&adev->gfx.partition_mutex);
4174	mutex_init(&adev->grbm_idx_mutex);
4175	mutex_init(&adev->mn_lock);
4176	mutex_init(&adev->virt.vf_errors.lock);
4177	mutex_init(&adev->virt.rlcg_reg_lock);
4178	hash_init(adev->mn_hash);
4179	mutex_init(&adev->psp.mutex);
4180	mutex_init(&adev->notifier_lock);
4181	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4182	mutex_init(&adev->benchmark_mutex);
4183	mutex_init(&adev->gfx.reset_sem_mutex);
4184	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4185	mutex_init(&adev->enforce_isolation_mutex);
4186	mutex_init(&adev->gfx.kfd_sch_mutex);
4187
4188	amdgpu_device_init_apu_flags(adev);
4189
4190	r = amdgpu_device_check_arguments(adev);
4191	if (r)
4192		return r;
4193
4194	spin_lock_init(&adev->mmio_idx_lock);
4195	spin_lock_init(&adev->smc_idx_lock);
4196	spin_lock_init(&adev->pcie_idx_lock);
4197	spin_lock_init(&adev->uvd_ctx_idx_lock);
4198	spin_lock_init(&adev->didt_idx_lock);
4199	spin_lock_init(&adev->gc_cac_idx_lock);
4200	spin_lock_init(&adev->se_cac_idx_lock);
4201	spin_lock_init(&adev->audio_endpt_idx_lock);
4202	spin_lock_init(&adev->mm_stats.lock);
4203	spin_lock_init(&adev->wb.lock);
4204
4205	INIT_LIST_HEAD(&adev->reset_list);
4206
4207	INIT_LIST_HEAD(&adev->ras_list);
4208
4209	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4210
4211	INIT_DELAYED_WORK(&adev->delayed_init_work,
4212			  amdgpu_device_delayed_init_work_handler);
4213	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4214			  amdgpu_device_delay_enable_gfx_off);
4215	/*
4216	 * Initialize the enforce_isolation work structures for each XCP
4217	 * partition.  This work handler is responsible for enforcing shader
4218	 * isolation on AMD GPUs.  It counts the number of emitted fences for
4219	 * each GFX and compute ring.  If there are any fences, it schedules
4220	 * the `enforce_isolation_work` to be run after a delay.  If there are
4221	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4222	 * runqueue.
4223	 */
4224	for (i = 0; i < MAX_XCP; i++) {
4225		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4226				  amdgpu_gfx_enforce_isolation_handler);
4227		adev->gfx.enforce_isolation[i].adev = adev;
4228		adev->gfx.enforce_isolation[i].xcp_id = i;
4229	}
4230
4231	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4232
4233	adev->gfx.gfx_off_req_count = 1;
4234	adev->gfx.gfx_off_residency = 0;
4235	adev->gfx.gfx_off_entrycount = 0;
4236	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4237
4238	atomic_set(&adev->throttling_logging_enabled, 1);
4239	/*
4240	 * If throttling continues, logging will be performed every minute
4241	 * to avoid log flooding. "-1" is subtracted since the thermal
4242	 * throttling interrupt comes every second. Thus, the total logging
4243	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4244	 * for throttling interrupt) = 60 seconds.
4245	 */
4246	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4247	ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1);
4248
4249	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4250	ratelimit_set_flags(&adev->virt.ras_telemetry_rs, RATELIMIT_MSG_ON_RELEASE);
4251
4252	/* Registers mapping */
4253	/* TODO: block userspace mapping of io register */
4254	if (adev->asic_type >= CHIP_BONAIRE) {
4255		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4256		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4257	} else {
4258		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4259		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4260	}
4261
4262	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4263		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4264
4265	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4266	if (!adev->rmmio)
4267		return -ENOMEM;
4268
4269	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4270	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4271
4272	/*
4273	 * Reset domain needs to be present early, before XGMI hive discovered
4274	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4275	 * early on during init and before calling to RREG32.
4276	 */
4277	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4278	if (!adev->reset_domain)
4279		return -ENOMEM;
4280
4281	/* detect hw virtualization here */
4282	amdgpu_detect_virtualization(adev);
4283
4284	amdgpu_device_get_pcie_info(adev);
4285
4286	r = amdgpu_device_get_job_timeout_settings(adev);
4287	if (r) {
4288		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4289		return r;
4290	}
4291
4292	amdgpu_device_set_mcbp(adev);
4293
4294	/*
4295	 * By default, use default mode where all blocks are expected to be
4296	 * initialized. At present a 'swinit' of blocks is required to be
4297	 * completed before the need for a different level is detected.
4298	 */
4299	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
4300	/* early init functions */
4301	r = amdgpu_device_ip_early_init(adev);
4302	if (r)
4303		return r;
4304
4305	/* Get rid of things like offb */
4306	r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
4307	if (r)
4308		return r;
4309
4310	/* Enable TMZ based on IP_VERSION */
4311	amdgpu_gmc_tmz_set(adev);
4312
4313	if (amdgpu_sriov_vf(adev) &&
4314	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4315		/* VF MMIO access (except mailbox range) from CPU
4316		 * will be blocked during sriov runtime
4317		 */
4318		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4319
4320	amdgpu_gmc_noretry_set(adev);
4321	/* Need to get xgmi info early to decide the reset behavior*/
4322	if (adev->gmc.xgmi.supported) {
4323		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4324		if (r)
4325			return r;
4326	}
4327
4328	/* enable PCIE atomic ops */
4329	if (amdgpu_sriov_vf(adev)) {
4330		if (adev->virt.fw_reserve.p_pf2vf)
4331			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4332						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4333				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4334	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4335	 * internal path natively support atomics, set have_atomics_support to true.
4336	 */
4337	} else if ((adev->flags & AMD_IS_APU) &&
4338		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4339		    IP_VERSION(9, 0, 0))) {
4340		adev->have_atomics_support = true;
4341	} else {
4342		adev->have_atomics_support =
4343			!pci_enable_atomic_ops_to_root(adev->pdev,
4344					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4345					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4346	}
4347
4348	if (!adev->have_atomics_support)
4349		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4350
4351	/* doorbell bar mapping and doorbell index init*/
4352	amdgpu_doorbell_init(adev);
4353
4354	if (amdgpu_emu_mode == 1) {
4355		/* post the asic on emulation mode */
4356		emu_soc_asic_init(adev);
4357		goto fence_driver_init;
4358	}
4359
4360	amdgpu_reset_init(adev);
4361
4362	/* detect if we are with an SRIOV vbios */
4363	if (adev->bios)
4364		amdgpu_device_detect_sriov_bios(adev);
4365
4366	/* check if we need to reset the asic
4367	 *  E.g., driver was not cleanly unloaded previously, etc.
4368	 */
4369	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4370		if (adev->gmc.xgmi.num_physical_nodes) {
4371			dev_info(adev->dev, "Pending hive reset.\n");
4372			amdgpu_set_init_level(adev,
4373					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4374		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4375				   !amdgpu_device_has_display_hardware(adev)) {
4376					r = psp_gpu_reset(adev);
4377		} else {
4378				tmp = amdgpu_reset_method;
4379				/* It should do a default reset when loading or reloading the driver,
4380				 * regardless of the module parameter reset_method.
4381				 */
4382				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4383				r = amdgpu_asic_reset(adev);
4384				amdgpu_reset_method = tmp;
4385		}
4386
4387		if (r) {
4388		  dev_err(adev->dev, "asic reset on init failed\n");
4389		  goto failed;
4390		}
4391	}
4392
4393	/* Post card if necessary */
4394	if (amdgpu_device_need_post(adev)) {
4395		if (!adev->bios) {
4396			dev_err(adev->dev, "no vBIOS found\n");
4397			r = -EINVAL;
4398			goto failed;
4399		}
4400		DRM_INFO("GPU posting now...\n");
4401		r = amdgpu_device_asic_init(adev);
4402		if (r) {
4403			dev_err(adev->dev, "gpu post error!\n");
4404			goto failed;
4405		}
4406	}
4407
4408	if (adev->bios) {
4409		if (adev->is_atom_fw) {
4410			/* Initialize clocks */
4411			r = amdgpu_atomfirmware_get_clock_info(adev);
4412			if (r) {
4413				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4414				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4415				goto failed;
4416			}
4417		} else {
4418			/* Initialize clocks */
4419			r = amdgpu_atombios_get_clock_info(adev);
4420			if (r) {
4421				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4422				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4423				goto failed;
4424			}
4425			/* init i2c buses */
4426			if (!amdgpu_device_has_dc_support(adev))
4427				amdgpu_atombios_i2c_init(adev);
4428		}
4429	}
4430
4431fence_driver_init:
4432	/* Fence driver */
4433	r = amdgpu_fence_driver_sw_init(adev);
4434	if (r) {
4435		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4436		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4437		goto failed;
4438	}
4439
4440	/* init the mode config */
4441	drm_mode_config_init(adev_to_drm(adev));
4442
4443	r = amdgpu_device_ip_init(adev);
4444	if (r) {
4445		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4446		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4447		goto release_ras_con;
4448	}
4449
4450	amdgpu_fence_driver_hw_init(adev);
4451
4452	dev_info(adev->dev,
4453		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4454			adev->gfx.config.max_shader_engines,
4455			adev->gfx.config.max_sh_per_se,
4456			adev->gfx.config.max_cu_per_sh,
4457			adev->gfx.cu_info.number);
4458
4459	adev->accel_working = true;
4460
4461	amdgpu_vm_check_compute_bug(adev);
4462
4463	/* Initialize the buffer migration limit. */
4464	if (amdgpu_moverate >= 0)
4465		max_MBps = amdgpu_moverate;
4466	else
4467		max_MBps = 8; /* Allow 8 MB/s. */
4468	/* Get a log2 for easy divisions. */
4469	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4470
4471	/*
4472	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4473	 * Otherwise the mgpu fan boost feature will be skipped due to the
4474	 * gpu instance is counted less.
4475	 */
4476	amdgpu_register_gpu_instance(adev);
4477
4478	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4479	 * explicit gating rather than handling it automatically.
4480	 */
4481	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4482		r = amdgpu_device_ip_late_init(adev);
4483		if (r) {
4484			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4485			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4486			goto release_ras_con;
4487		}
4488		/* must succeed. */
4489		amdgpu_ras_resume(adev);
4490		queue_delayed_work(system_wq, &adev->delayed_init_work,
4491				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4492	}
4493
4494	if (amdgpu_sriov_vf(adev)) {
4495		amdgpu_virt_release_full_gpu(adev, true);
4496		flush_delayed_work(&adev->delayed_init_work);
4497	}
4498
4499	/*
4500	 * Place those sysfs registering after `late_init`. As some of those
4501	 * operations performed in `late_init` might affect the sysfs
4502	 * interfaces creating.
4503	 */
4504	r = amdgpu_atombios_sysfs_init(adev);
4505	if (r)
4506		drm_err(&adev->ddev,
4507			"registering atombios sysfs failed (%d).\n", r);
4508
4509	r = amdgpu_pm_sysfs_init(adev);
4510	if (r)
4511		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4512
4513	r = amdgpu_ucode_sysfs_init(adev);
4514	if (r) {
4515		adev->ucode_sysfs_en = false;
4516		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4517	} else
4518		adev->ucode_sysfs_en = true;
4519
4520	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4521	if (r)
4522		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4523
4524	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4525	if (r)
4526		dev_err(adev->dev,
4527			"Could not create amdgpu board attributes\n");
4528
4529	amdgpu_fru_sysfs_init(adev);
4530	amdgpu_reg_state_sysfs_init(adev);
4531	amdgpu_xcp_cfg_sysfs_init(adev);
4532
4533	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4534		r = amdgpu_pmu_init(adev);
4535	if (r)
4536		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4537
4538	/* Have stored pci confspace at hand for restore in sudden PCI error */
4539	if (amdgpu_device_cache_pci_state(adev->pdev))
4540		pci_restore_state(pdev);
4541
4542	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4543	/* this will fail for cards that aren't VGA class devices, just
4544	 * ignore it
4545	 */
4546	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4547		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4548
4549	px = amdgpu_device_supports_px(ddev);
4550
4551	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4552				apple_gmux_detect(NULL, NULL)))
4553		vga_switcheroo_register_client(adev->pdev,
4554					       &amdgpu_switcheroo_ops, px);
4555
4556	if (px)
4557		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4558
4559	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4560		amdgpu_xgmi_reset_on_init(adev);
4561
4562	amdgpu_device_check_iommu_direct_map(adev);
4563
4564	return 0;
4565
4566release_ras_con:
4567	if (amdgpu_sriov_vf(adev))
4568		amdgpu_virt_release_full_gpu(adev, true);
4569
4570	/* failed in exclusive mode due to timeout */
4571	if (amdgpu_sriov_vf(adev) &&
4572		!amdgpu_sriov_runtime(adev) &&
4573		amdgpu_virt_mmio_blocked(adev) &&
4574		!amdgpu_virt_wait_reset(adev)) {
4575		dev_err(adev->dev, "VF exclusive mode timeout\n");
4576		/* Don't send request since VF is inactive. */
4577		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4578		adev->virt.ops = NULL;
4579		r = -EAGAIN;
4580	}
4581	amdgpu_release_ras_context(adev);
4582
4583failed:
4584	amdgpu_vf_error_trans_all(adev);
4585
4586	return r;
4587}
4588
4589static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4590{
4591
4592	/* Clear all CPU mappings pointing to this device */
4593	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4594
4595	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4596	amdgpu_doorbell_fini(adev);
4597
4598	iounmap(adev->rmmio);
4599	adev->rmmio = NULL;
4600	if (adev->mman.aper_base_kaddr)
4601		iounmap(adev->mman.aper_base_kaddr);
4602	adev->mman.aper_base_kaddr = NULL;
4603
4604	/* Memory manager related */
4605	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4606		arch_phys_wc_del(adev->gmc.vram_mtrr);
4607		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4608	}
4609}
4610
4611/**
4612 * amdgpu_device_fini_hw - tear down the driver
4613 *
4614 * @adev: amdgpu_device pointer
4615 *
4616 * Tear down the driver info (all asics).
4617 * Called at driver shutdown.
4618 */
4619void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4620{
4621	dev_info(adev->dev, "amdgpu: finishing device.\n");
4622	flush_delayed_work(&adev->delayed_init_work);
4623
4624	if (adev->mman.initialized)
4625		drain_workqueue(adev->mman.bdev.wq);
4626	adev->shutdown = true;
4627
4628	/* make sure IB test finished before entering exclusive mode
4629	 * to avoid preemption on IB test
4630	 */
4631	if (amdgpu_sriov_vf(adev)) {
4632		amdgpu_virt_request_full_gpu(adev, false);
4633		amdgpu_virt_fini_data_exchange(adev);
4634	}
4635
4636	/* disable all interrupts */
4637	amdgpu_irq_disable_all(adev);
4638	if (adev->mode_info.mode_config_initialized) {
4639		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4640			drm_helper_force_disable_all(adev_to_drm(adev));
4641		else
4642			drm_atomic_helper_shutdown(adev_to_drm(adev));
4643	}
4644	amdgpu_fence_driver_hw_fini(adev);
4645
4646	if (adev->pm.sysfs_initialized)
4647		amdgpu_pm_sysfs_fini(adev);
4648	if (adev->ucode_sysfs_en)
4649		amdgpu_ucode_sysfs_fini(adev);
4650	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4651	amdgpu_fru_sysfs_fini(adev);
4652
4653	amdgpu_reg_state_sysfs_fini(adev);
4654	amdgpu_xcp_cfg_sysfs_fini(adev);
4655
4656	/* disable ras feature must before hw fini */
4657	amdgpu_ras_pre_fini(adev);
4658
4659	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4660
4661	amdgpu_device_ip_fini_early(adev);
4662
4663	amdgpu_irq_fini_hw(adev);
4664
4665	if (adev->mman.initialized)
4666		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4667
4668	amdgpu_gart_dummy_page_fini(adev);
4669
4670	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4671		amdgpu_device_unmap_mmio(adev);
4672
4673}
4674
4675void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4676{
4677	int idx;
4678	bool px;
4679
4680	amdgpu_device_ip_fini(adev);
4681	amdgpu_fence_driver_sw_fini(adev);
4682	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4683	adev->accel_working = false;
4684	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4685
4686	amdgpu_reset_fini(adev);
4687
4688	/* free i2c buses */
4689	if (!amdgpu_device_has_dc_support(adev))
4690		amdgpu_i2c_fini(adev);
4691
4692	if (amdgpu_emu_mode != 1)
4693		amdgpu_atombios_fini(adev);
4694
4695	kfree(adev->bios);
4696	adev->bios = NULL;
4697
4698	kfree(adev->fru_info);
4699	adev->fru_info = NULL;
4700
4701	px = amdgpu_device_supports_px(adev_to_drm(adev));
4702
4703	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4704				apple_gmux_detect(NULL, NULL)))
4705		vga_switcheroo_unregister_client(adev->pdev);
4706
4707	if (px)
4708		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4709
4710	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4711		vga_client_unregister(adev->pdev);
4712
4713	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4714
4715		iounmap(adev->rmmio);
4716		adev->rmmio = NULL;
4717		amdgpu_doorbell_fini(adev);
4718		drm_dev_exit(idx);
4719	}
4720
4721	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4722		amdgpu_pmu_fini(adev);
4723	if (adev->mman.discovery_bin)
4724		amdgpu_discovery_fini(adev);
4725
4726	amdgpu_reset_put_reset_domain(adev->reset_domain);
4727	adev->reset_domain = NULL;
4728
4729	kfree(adev->pci_state);
4730
4731}
4732
4733/**
4734 * amdgpu_device_evict_resources - evict device resources
4735 * @adev: amdgpu device object
4736 *
4737 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4738 * of the vram memory type. Mainly used for evicting device resources
4739 * at suspend time.
4740 *
4741 */
4742static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4743{
4744	int ret;
4745
4746	/* No need to evict vram on APUs for suspend to ram or s2idle */
4747	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4748		return 0;
4749
4750	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4751	if (ret)
4752		DRM_WARN("evicting device resources failed\n");
4753	return ret;
4754}
4755
4756/*
4757 * Suspend & resume.
4758 */
4759/**
4760 * amdgpu_device_prepare - prepare for device suspend
4761 *
4762 * @dev: drm dev pointer
4763 *
4764 * Prepare to put the hw in the suspend state (all asics).
4765 * Returns 0 for success or an error on failure.
4766 * Called at driver suspend.
4767 */
4768int amdgpu_device_prepare(struct drm_device *dev)
4769{
4770	struct amdgpu_device *adev = drm_to_adev(dev);
4771	int i, r;
4772
4773	amdgpu_choose_low_power_state(adev);
4774
4775	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4776		return 0;
4777
4778	/* Evict the majority of BOs before starting suspend sequence */
4779	r = amdgpu_device_evict_resources(adev);
4780	if (r)
4781		goto unprepare;
4782
4783	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4784
4785	for (i = 0; i < adev->num_ip_blocks; i++) {
4786		if (!adev->ip_blocks[i].status.valid)
4787			continue;
4788		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4789			continue;
4790		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4791		if (r)
4792			goto unprepare;
4793	}
4794
4795	return 0;
4796
4797unprepare:
4798	adev->in_s0ix = adev->in_s3 = false;
4799
4800	return r;
4801}
4802
4803/**
4804 * amdgpu_device_suspend - initiate device suspend
4805 *
4806 * @dev: drm dev pointer
4807 * @notify_clients: notify in-kernel DRM clients
4808 *
4809 * Puts the hw in the suspend state (all asics).
4810 * Returns 0 for success or an error on failure.
4811 * Called at driver suspend.
4812 */
4813int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4814{
4815	struct amdgpu_device *adev = drm_to_adev(dev);
4816	int r = 0;
4817
4818	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4819		return 0;
4820
4821	adev->in_suspend = true;
4822
4823	if (amdgpu_sriov_vf(adev)) {
4824		amdgpu_virt_fini_data_exchange(adev);
4825		r = amdgpu_virt_request_full_gpu(adev, false);
4826		if (r)
4827			return r;
4828	}
4829
4830	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4831		DRM_WARN("smart shift update failed\n");
4832
4833	if (notify_clients)
4834		drm_client_dev_suspend(adev_to_drm(adev), false);
4835
4836	cancel_delayed_work_sync(&adev->delayed_init_work);
4837
4838	amdgpu_ras_suspend(adev);
4839
4840	amdgpu_device_ip_suspend_phase1(adev);
4841
4842	if (!adev->in_s0ix)
4843		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4844
4845	r = amdgpu_device_evict_resources(adev);
4846	if (r)
4847		return r;
4848
4849	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4850
4851	amdgpu_fence_driver_hw_fini(adev);
4852
4853	amdgpu_device_ip_suspend_phase2(adev);
4854
4855	if (amdgpu_sriov_vf(adev))
4856		amdgpu_virt_release_full_gpu(adev, false);
4857
4858	r = amdgpu_dpm_notify_rlc_state(adev, false);
4859	if (r)
4860		return r;
4861
4862	return 0;
4863}
4864
4865/**
4866 * amdgpu_device_resume - initiate device resume
4867 *
4868 * @dev: drm dev pointer
4869 * @notify_clients: notify in-kernel DRM clients
4870 *
4871 * Bring the hw back to operating state (all asics).
4872 * Returns 0 for success or an error on failure.
4873 * Called at driver resume.
4874 */
4875int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4876{
4877	struct amdgpu_device *adev = drm_to_adev(dev);
4878	int r = 0;
4879
4880	if (amdgpu_sriov_vf(adev)) {
4881		r = amdgpu_virt_request_full_gpu(adev, true);
4882		if (r)
4883			return r;
4884	}
4885
4886	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4887		return 0;
4888
4889	if (adev->in_s0ix)
4890		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4891
4892	/* post card */
4893	if (amdgpu_device_need_post(adev)) {
4894		r = amdgpu_device_asic_init(adev);
4895		if (r)
4896			dev_err(adev->dev, "amdgpu asic init failed\n");
4897	}
4898
4899	r = amdgpu_device_ip_resume(adev);
4900
4901	if (r) {
4902		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4903		goto exit;
4904	}
4905	amdgpu_fence_driver_hw_init(adev);
4906
4907	if (!adev->in_s0ix) {
4908		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4909		if (r)
4910			goto exit;
4911	}
4912
4913	r = amdgpu_device_ip_late_init(adev);
4914	if (r)
4915		goto exit;
4916
4917	queue_delayed_work(system_wq, &adev->delayed_init_work,
4918			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4919exit:
4920	if (amdgpu_sriov_vf(adev)) {
4921		amdgpu_virt_init_data_exchange(adev);
4922		amdgpu_virt_release_full_gpu(adev, true);
4923	}
4924
4925	if (r)
4926		return r;
4927
4928	/* Make sure IB tests flushed */
4929	flush_delayed_work(&adev->delayed_init_work);
4930
4931	if (notify_clients)
4932		drm_client_dev_resume(adev_to_drm(adev), false);
4933
4934	amdgpu_ras_resume(adev);
4935
4936	if (adev->mode_info.num_crtc) {
4937		/*
4938		 * Most of the connector probing functions try to acquire runtime pm
4939		 * refs to ensure that the GPU is powered on when connector polling is
4940		 * performed. Since we're calling this from a runtime PM callback,
4941		 * trying to acquire rpm refs will cause us to deadlock.
4942		 *
4943		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4944		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4945		 */
4946#ifdef CONFIG_PM
4947		dev->dev->power.disable_depth++;
4948#endif
4949		if (!adev->dc_enabled)
4950			drm_helper_hpd_irq_event(dev);
4951		else
4952			drm_kms_helper_hotplug_event(dev);
4953#ifdef CONFIG_PM
4954		dev->dev->power.disable_depth--;
4955#endif
4956	}
4957	adev->in_suspend = false;
4958
4959	if (adev->enable_mes)
4960		amdgpu_mes_self_test(adev);
4961
4962	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4963		DRM_WARN("smart shift update failed\n");
4964
4965	return 0;
4966}
4967
4968/**
4969 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4970 *
4971 * @adev: amdgpu_device pointer
4972 *
4973 * The list of all the hardware IPs that make up the asic is walked and
4974 * the check_soft_reset callbacks are run.  check_soft_reset determines
4975 * if the asic is still hung or not.
4976 * Returns true if any of the IPs are still in a hung state, false if not.
4977 */
4978static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4979{
4980	int i;
4981	bool asic_hang = false;
4982
4983	if (amdgpu_sriov_vf(adev))
4984		return true;
4985
4986	if (amdgpu_asic_need_full_reset(adev))
4987		return true;
4988
4989	for (i = 0; i < adev->num_ip_blocks; i++) {
4990		if (!adev->ip_blocks[i].status.valid)
4991			continue;
4992		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4993			adev->ip_blocks[i].status.hang =
4994				adev->ip_blocks[i].version->funcs->check_soft_reset(
4995					&adev->ip_blocks[i]);
4996		if (adev->ip_blocks[i].status.hang) {
4997			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4998			asic_hang = true;
4999		}
5000	}
5001	return asic_hang;
5002}
5003
5004/**
5005 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5006 *
5007 * @adev: amdgpu_device pointer
5008 *
5009 * The list of all the hardware IPs that make up the asic is walked and the
5010 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
5011 * handles any IP specific hardware or software state changes that are
5012 * necessary for a soft reset to succeed.
5013 * Returns 0 on success, negative error code on failure.
5014 */
5015static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5016{
5017	int i, r = 0;
5018
5019	for (i = 0; i < adev->num_ip_blocks; i++) {
5020		if (!adev->ip_blocks[i].status.valid)
5021			continue;
5022		if (adev->ip_blocks[i].status.hang &&
5023		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5024			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5025			if (r)
5026				return r;
5027		}
5028	}
5029
5030	return 0;
5031}
5032
5033/**
5034 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5035 *
5036 * @adev: amdgpu_device pointer
5037 *
5038 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
5039 * reset is necessary to recover.
5040 * Returns true if a full asic reset is required, false if not.
5041 */
5042static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5043{
5044	int i;
5045
5046	if (amdgpu_asic_need_full_reset(adev))
5047		return true;
5048
5049	for (i = 0; i < adev->num_ip_blocks; i++) {
5050		if (!adev->ip_blocks[i].status.valid)
5051			continue;
5052		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5053		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5054		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5055		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5056		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5057			if (adev->ip_blocks[i].status.hang) {
5058				dev_info(adev->dev, "Some block need full reset!\n");
5059				return true;
5060			}
5061		}
5062	}
5063	return false;
5064}
5065
5066/**
5067 * amdgpu_device_ip_soft_reset - do a soft reset
5068 *
5069 * @adev: amdgpu_device pointer
5070 *
5071 * The list of all the hardware IPs that make up the asic is walked and the
5072 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
5073 * IP specific hardware or software state changes that are necessary to soft
5074 * reset the IP.
5075 * Returns 0 on success, negative error code on failure.
5076 */
5077static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5078{
5079	int i, r = 0;
5080
5081	for (i = 0; i < adev->num_ip_blocks; i++) {
5082		if (!adev->ip_blocks[i].status.valid)
5083			continue;
5084		if (adev->ip_blocks[i].status.hang &&
5085		    adev->ip_blocks[i].version->funcs->soft_reset) {
5086			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5087			if (r)
5088				return r;
5089		}
5090	}
5091
5092	return 0;
5093}
5094
5095/**
5096 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5097 *
5098 * @adev: amdgpu_device pointer
5099 *
5100 * The list of all the hardware IPs that make up the asic is walked and the
5101 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
5102 * handles any IP specific hardware or software state changes that are
5103 * necessary after the IP has been soft reset.
5104 * Returns 0 on success, negative error code on failure.
5105 */
5106static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5107{
5108	int i, r = 0;
5109
5110	for (i = 0; i < adev->num_ip_blocks; i++) {
5111		if (!adev->ip_blocks[i].status.valid)
5112			continue;
5113		if (adev->ip_blocks[i].status.hang &&
5114		    adev->ip_blocks[i].version->funcs->post_soft_reset)
5115			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5116		if (r)
5117			return r;
5118	}
5119
5120	return 0;
5121}
5122
5123/**
5124 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5125 *
5126 * @adev: amdgpu_device pointer
5127 * @reset_context: amdgpu reset context pointer
5128 *
5129 * do VF FLR and reinitialize Asic
5130 * return 0 means succeeded otherwise failed
5131 */
5132static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5133				     struct amdgpu_reset_context *reset_context)
5134{
5135	int r;
5136	struct amdgpu_hive_info *hive = NULL;
5137
5138	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5139		if (!amdgpu_ras_get_fed_status(adev))
5140			amdgpu_virt_ready_to_reset(adev);
5141		amdgpu_virt_wait_reset(adev);
5142		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5143		r = amdgpu_virt_request_full_gpu(adev, true);
5144	} else {
5145		r = amdgpu_virt_reset_gpu(adev);
5146	}
5147	if (r)
5148		return r;
5149
5150	amdgpu_ras_set_fed(adev, false);
5151	amdgpu_irq_gpu_reset_resume_helper(adev);
5152
5153	/* some sw clean up VF needs to do before recover */
5154	amdgpu_virt_post_reset(adev);
5155
5156	/* Resume IP prior to SMC */
5157	r = amdgpu_device_ip_reinit_early_sriov(adev);
5158	if (r)
5159		return r;
5160
5161	amdgpu_virt_init_data_exchange(adev);
5162
5163	r = amdgpu_device_fw_loading(adev);
5164	if (r)
5165		return r;
5166
5167	/* now we are okay to resume SMC/CP/SDMA */
5168	r = amdgpu_device_ip_reinit_late_sriov(adev);
5169	if (r)
5170		return r;
5171
5172	hive = amdgpu_get_xgmi_hive(adev);
5173	/* Update PSP FW topology after reset */
5174	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5175		r = amdgpu_xgmi_update_topology(hive, adev);
5176	if (hive)
5177		amdgpu_put_xgmi_hive(hive);
5178	if (r)
5179		return r;
5180
5181	r = amdgpu_ib_ring_tests(adev);
5182	if (r)
5183		return r;
5184
5185	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5186		amdgpu_inc_vram_lost(adev);
5187
5188	/* need to be called during full access so we can't do it later like
5189	 * bare-metal does.
5190	 */
5191	amdgpu_amdkfd_post_reset(adev);
5192	amdgpu_virt_release_full_gpu(adev, true);
5193
5194	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5195	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5196	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5197	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5198	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5199		amdgpu_ras_resume(adev);
5200
5201	amdgpu_virt_ras_telemetry_post_reset(adev);
5202
5203	return 0;
5204}
5205
5206/**
5207 * amdgpu_device_has_job_running - check if there is any job in mirror list
5208 *
5209 * @adev: amdgpu_device pointer
5210 *
5211 * check if there is any job in mirror list
5212 */
5213bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5214{
5215	int i;
5216	struct drm_sched_job *job;
5217
5218	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5219		struct amdgpu_ring *ring = adev->rings[i];
5220
5221		if (!amdgpu_ring_sched_ready(ring))
5222			continue;
5223
5224		spin_lock(&ring->sched.job_list_lock);
5225		job = list_first_entry_or_null(&ring->sched.pending_list,
5226					       struct drm_sched_job, list);
5227		spin_unlock(&ring->sched.job_list_lock);
5228		if (job)
5229			return true;
5230	}
5231	return false;
5232}
5233
5234/**
5235 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5236 *
5237 * @adev: amdgpu_device pointer
5238 *
5239 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5240 * a hung GPU.
5241 */
5242bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5243{
5244
5245	if (amdgpu_gpu_recovery == 0)
5246		goto disabled;
5247
5248	/* Skip soft reset check in fatal error mode */
5249	if (!amdgpu_ras_is_poison_mode_supported(adev))
5250		return true;
5251
5252	if (amdgpu_sriov_vf(adev))
5253		return true;
5254
5255	if (amdgpu_gpu_recovery == -1) {
5256		switch (adev->asic_type) {
5257#ifdef CONFIG_DRM_AMDGPU_SI
5258		case CHIP_VERDE:
5259		case CHIP_TAHITI:
5260		case CHIP_PITCAIRN:
5261		case CHIP_OLAND:
5262		case CHIP_HAINAN:
5263#endif
5264#ifdef CONFIG_DRM_AMDGPU_CIK
5265		case CHIP_KAVERI:
5266		case CHIP_KABINI:
5267		case CHIP_MULLINS:
5268#endif
5269		case CHIP_CARRIZO:
5270		case CHIP_STONEY:
5271		case CHIP_CYAN_SKILLFISH:
5272			goto disabled;
5273		default:
5274			break;
5275		}
5276	}
5277
5278	return true;
5279
5280disabled:
5281		dev_info(adev->dev, "GPU recovery disabled.\n");
5282		return false;
5283}
5284
5285int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5286{
5287	u32 i;
5288	int ret = 0;
5289
5290	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5291
5292	dev_info(adev->dev, "GPU mode1 reset\n");
5293
5294	/* Cache the state before bus master disable. The saved config space
5295	 * values are used in other cases like restore after mode-2 reset.
5296	 */
5297	amdgpu_device_cache_pci_state(adev->pdev);
5298
5299	/* disable BM */
5300	pci_clear_master(adev->pdev);
5301
5302	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5303		dev_info(adev->dev, "GPU smu mode1 reset\n");
5304		ret = amdgpu_dpm_mode1_reset(adev);
5305	} else {
5306		dev_info(adev->dev, "GPU psp mode1 reset\n");
5307		ret = psp_gpu_reset(adev);
5308	}
5309
5310	if (ret)
5311		goto mode1_reset_failed;
5312
5313	amdgpu_device_load_pci_state(adev->pdev);
5314	ret = amdgpu_psp_wait_for_bootloader(adev);
5315	if (ret)
5316		goto mode1_reset_failed;
5317
5318	/* wait for asic to come out of reset */
5319	for (i = 0; i < adev->usec_timeout; i++) {
5320		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5321
5322		if (memsize != 0xffffffff)
5323			break;
5324		udelay(1);
5325	}
5326
5327	if (i >= adev->usec_timeout) {
5328		ret = -ETIMEDOUT;
5329		goto mode1_reset_failed;
5330	}
5331
5332	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5333
5334	return 0;
5335
5336mode1_reset_failed:
5337	dev_err(adev->dev, "GPU mode1 reset failed\n");
5338	return ret;
5339}
5340
5341int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5342				 struct amdgpu_reset_context *reset_context)
5343{
5344	int i, r = 0;
5345	struct amdgpu_job *job = NULL;
5346	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5347	bool need_full_reset =
5348		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5349
5350	if (reset_context->reset_req_dev == adev)
5351		job = reset_context->job;
5352
5353	if (amdgpu_sriov_vf(adev))
5354		amdgpu_virt_pre_reset(adev);
5355
5356	amdgpu_fence_driver_isr_toggle(adev, true);
5357
5358	/* block all schedulers and reset given job's ring */
5359	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5360		struct amdgpu_ring *ring = adev->rings[i];
5361
5362		if (!amdgpu_ring_sched_ready(ring))
5363			continue;
5364
5365		/* Clear job fence from fence drv to avoid force_completion
5366		 * leave NULL and vm flush fence in fence drv
5367		 */
5368		amdgpu_fence_driver_clear_job_fences(ring);
5369
5370		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5371		amdgpu_fence_driver_force_completion(ring);
5372	}
5373
5374	amdgpu_fence_driver_isr_toggle(adev, false);
5375
5376	if (job && job->vm)
5377		drm_sched_increase_karma(&job->base);
5378
5379	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5380	/* If reset handler not implemented, continue; otherwise return */
5381	if (r == -EOPNOTSUPP)
5382		r = 0;
5383	else
5384		return r;
5385
5386	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5387	if (!amdgpu_sriov_vf(adev)) {
5388
5389		if (!need_full_reset)
5390			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5391
5392		if (!need_full_reset && amdgpu_gpu_recovery &&
5393		    amdgpu_device_ip_check_soft_reset(adev)) {
5394			amdgpu_device_ip_pre_soft_reset(adev);
5395			r = amdgpu_device_ip_soft_reset(adev);
5396			amdgpu_device_ip_post_soft_reset(adev);
5397			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5398				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5399				need_full_reset = true;
5400			}
5401		}
5402
5403		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5404			dev_info(tmp_adev->dev, "Dumping IP State\n");
5405			/* Trigger ip dump before we reset the asic */
5406			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5407				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5408					tmp_adev->ip_blocks[i].version->funcs
5409						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5410			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5411		}
5412
5413		if (need_full_reset)
5414			r = amdgpu_device_ip_suspend(adev);
5415		if (need_full_reset)
5416			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5417		else
5418			clear_bit(AMDGPU_NEED_FULL_RESET,
5419				  &reset_context->flags);
5420	}
5421
5422	return r;
5423}
5424
5425int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5426{
5427	struct list_head *device_list_handle;
5428	bool full_reset, vram_lost = false;
5429	struct amdgpu_device *tmp_adev;
5430	int r, init_level;
5431
5432	device_list_handle = reset_context->reset_device_list;
5433
5434	if (!device_list_handle)
5435		return -EINVAL;
5436
5437	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5438
5439	/**
5440	 * If it's reset on init, it's default init level, otherwise keep level
5441	 * as recovery level.
5442	 */
5443	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5444			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5445	else
5446			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5447
5448	r = 0;
5449	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5450		amdgpu_set_init_level(tmp_adev, init_level);
5451		if (full_reset) {
5452			/* post card */
5453			amdgpu_ras_set_fed(tmp_adev, false);
5454			r = amdgpu_device_asic_init(tmp_adev);
5455			if (r) {
5456				dev_warn(tmp_adev->dev, "asic atom init failed!");
5457			} else {
5458				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5459
5460				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5461				if (r)
5462					goto out;
5463
5464				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5465
5466				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5467					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5468
5469				if (vram_lost) {
5470					DRM_INFO("VRAM is lost due to GPU reset!\n");
5471					amdgpu_inc_vram_lost(tmp_adev);
5472				}
5473
5474				r = amdgpu_device_fw_loading(tmp_adev);
5475				if (r)
5476					return r;
5477
5478				r = amdgpu_xcp_restore_partition_mode(
5479					tmp_adev->xcp_mgr);
5480				if (r)
5481					goto out;
5482
5483				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5484				if (r)
5485					goto out;
5486
5487				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5488					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5489
5490				if (vram_lost)
5491					amdgpu_device_fill_reset_magic(tmp_adev);
5492
5493				/*
5494				 * Add this ASIC as tracked as reset was already
5495				 * complete successfully.
5496				 */
5497				amdgpu_register_gpu_instance(tmp_adev);
5498
5499				if (!reset_context->hive &&
5500				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5501					amdgpu_xgmi_add_device(tmp_adev);
5502
5503				r = amdgpu_device_ip_late_init(tmp_adev);
5504				if (r)
5505					goto out;
5506
5507				drm_client_dev_resume(adev_to_drm(tmp_adev), false);
5508
5509				/*
5510				 * The GPU enters bad state once faulty pages
5511				 * by ECC has reached the threshold, and ras
5512				 * recovery is scheduled next. So add one check
5513				 * here to break recovery if it indeed exceeds
5514				 * bad page threshold, and remind user to
5515				 * retire this GPU or setting one bigger
5516				 * bad_page_threshold value to fix this once
5517				 * probing driver again.
5518				 */
5519				if (!amdgpu_ras_is_rma(tmp_adev)) {
5520					/* must succeed. */
5521					amdgpu_ras_resume(tmp_adev);
5522				} else {
5523					r = -EINVAL;
5524					goto out;
5525				}
5526
5527				/* Update PSP FW topology after reset */
5528				if (reset_context->hive &&
5529				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5530					r = amdgpu_xgmi_update_topology(
5531						reset_context->hive, tmp_adev);
5532			}
5533		}
5534
5535out:
5536		if (!r) {
5537			/* IP init is complete now, set level as default */
5538			amdgpu_set_init_level(tmp_adev,
5539					      AMDGPU_INIT_LEVEL_DEFAULT);
5540			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5541			r = amdgpu_ib_ring_tests(tmp_adev);
5542			if (r) {
5543				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5544				r = -EAGAIN;
5545				goto end;
5546			}
5547		}
5548
5549		if (r)
5550			tmp_adev->asic_reset_res = r;
5551	}
5552
5553end:
5554	return r;
5555}
5556
5557int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5558			 struct amdgpu_reset_context *reset_context)
5559{
5560	struct amdgpu_device *tmp_adev = NULL;
5561	bool need_full_reset, skip_hw_reset;
5562	int r = 0;
5563
5564	/* Try reset handler method first */
5565	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5566				    reset_list);
5567
5568	reset_context->reset_device_list = device_list_handle;
5569	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5570	/* If reset handler not implemented, continue; otherwise return */
5571	if (r == -EOPNOTSUPP)
5572		r = 0;
5573	else
5574		return r;
5575
5576	/* Reset handler not implemented, use the default method */
5577	need_full_reset =
5578		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5579	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5580
5581	/*
5582	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5583	 * to allow proper links negotiation in FW (within 1 sec)
5584	 */
5585	if (!skip_hw_reset && need_full_reset) {
5586		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5587			/* For XGMI run all resets in parallel to speed up the process */
5588			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5589				if (!queue_work(system_unbound_wq,
5590						&tmp_adev->xgmi_reset_work))
5591					r = -EALREADY;
5592			} else
5593				r = amdgpu_asic_reset(tmp_adev);
5594
5595			if (r) {
5596				dev_err(tmp_adev->dev,
5597					"ASIC reset failed with error, %d for drm dev, %s",
5598					r, adev_to_drm(tmp_adev)->unique);
5599				goto out;
5600			}
5601		}
5602
5603		/* For XGMI wait for all resets to complete before proceed */
5604		if (!r) {
5605			list_for_each_entry(tmp_adev, device_list_handle,
5606					    reset_list) {
5607				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5608					flush_work(&tmp_adev->xgmi_reset_work);
5609					r = tmp_adev->asic_reset_res;
5610					if (r)
5611						break;
5612				}
5613			}
5614		}
5615	}
5616
5617	if (!r && amdgpu_ras_intr_triggered()) {
5618		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5619			amdgpu_ras_reset_error_count(tmp_adev,
5620						     AMDGPU_RAS_BLOCK__MMHUB);
5621		}
5622
5623		amdgpu_ras_intr_cleared();
5624	}
5625
5626	r = amdgpu_device_reinit_after_reset(reset_context);
5627	if (r == -EAGAIN)
5628		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5629	else
5630		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5631
5632out:
5633	return r;
5634}
5635
5636static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5637{
5638
5639	switch (amdgpu_asic_reset_method(adev)) {
5640	case AMD_RESET_METHOD_MODE1:
5641		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5642		break;
5643	case AMD_RESET_METHOD_MODE2:
5644		adev->mp1_state = PP_MP1_STATE_RESET;
5645		break;
5646	default:
5647		adev->mp1_state = PP_MP1_STATE_NONE;
5648		break;
5649	}
5650}
5651
5652static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5653{
5654	amdgpu_vf_error_trans_all(adev);
5655	adev->mp1_state = PP_MP1_STATE_NONE;
5656}
5657
5658static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5659{
5660	struct pci_dev *p = NULL;
5661
5662	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5663			adev->pdev->bus->number, 1);
5664	if (p) {
5665		pm_runtime_enable(&(p->dev));
5666		pm_runtime_resume(&(p->dev));
5667	}
5668
5669	pci_dev_put(p);
5670}
5671
5672static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5673{
5674	enum amd_reset_method reset_method;
5675	struct pci_dev *p = NULL;
5676	u64 expires;
5677
5678	/*
5679	 * For now, only BACO and mode1 reset are confirmed
5680	 * to suffer the audio issue without proper suspended.
5681	 */
5682	reset_method = amdgpu_asic_reset_method(adev);
5683	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5684	     (reset_method != AMD_RESET_METHOD_MODE1))
5685		return -EINVAL;
5686
5687	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5688			adev->pdev->bus->number, 1);
5689	if (!p)
5690		return -ENODEV;
5691
5692	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5693	if (!expires)
5694		/*
5695		 * If we cannot get the audio device autosuspend delay,
5696		 * a fixed 4S interval will be used. Considering 3S is
5697		 * the audio controller default autosuspend delay setting.
5698		 * 4S used here is guaranteed to cover that.
5699		 */
5700		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5701
5702	while (!pm_runtime_status_suspended(&(p->dev))) {
5703		if (!pm_runtime_suspend(&(p->dev)))
5704			break;
5705
5706		if (expires < ktime_get_mono_fast_ns()) {
5707			dev_warn(adev->dev, "failed to suspend display audio\n");
5708			pci_dev_put(p);
5709			/* TODO: abort the succeeding gpu reset? */
5710			return -ETIMEDOUT;
5711		}
5712	}
5713
5714	pm_runtime_disable(&(p->dev));
5715
5716	pci_dev_put(p);
5717	return 0;
5718}
5719
5720static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5721{
5722	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5723
5724#if defined(CONFIG_DEBUG_FS)
5725	if (!amdgpu_sriov_vf(adev))
5726		cancel_work(&adev->reset_work);
5727#endif
5728
5729	if (adev->kfd.dev)
5730		cancel_work(&adev->kfd.reset_work);
5731
5732	if (amdgpu_sriov_vf(adev))
5733		cancel_work(&adev->virt.flr_work);
5734
5735	if (con && adev->ras_enabled)
5736		cancel_work(&con->recovery_work);
5737
5738}
5739
5740static int amdgpu_device_health_check(struct list_head *device_list_handle)
5741{
5742	struct amdgpu_device *tmp_adev;
5743	int ret = 0;
5744	u32 status;
5745
5746	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5747		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5748		if (PCI_POSSIBLE_ERROR(status)) {
5749			dev_err(tmp_adev->dev, "device lost from bus!");
5750			ret = -ENODEV;
5751		}
5752	}
5753
5754	return ret;
5755}
5756
5757/**
5758 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5759 *
5760 * @adev: amdgpu_device pointer
5761 * @job: which job trigger hang
5762 * @reset_context: amdgpu reset context pointer
5763 *
5764 * Attempt to reset the GPU if it has hung (all asics).
5765 * Attempt to do soft-reset or full-reset and reinitialize Asic
5766 * Returns 0 for success or an error on failure.
5767 */
5768
5769int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5770			      struct amdgpu_job *job,
5771			      struct amdgpu_reset_context *reset_context)
5772{
5773	struct list_head device_list, *device_list_handle =  NULL;
5774	bool job_signaled = false;
5775	struct amdgpu_hive_info *hive = NULL;
5776	struct amdgpu_device *tmp_adev = NULL;
5777	int i, r = 0;
5778	bool need_emergency_restart = false;
5779	bool audio_suspended = false;
5780	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5781
5782	/*
5783	 * Special case: RAS triggered and full reset isn't supported
5784	 */
5785	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5786
5787	/*
5788	 * Flush RAM to disk so that after reboot
5789	 * the user can read log and see why the system rebooted.
5790	 */
5791	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5792		amdgpu_ras_get_context(adev)->reboot) {
5793		DRM_WARN("Emergency reboot.");
5794
5795		ksys_sync_helper();
5796		emergency_restart();
5797	}
5798
5799	dev_info(adev->dev, "GPU %s begin!\n",
5800		need_emergency_restart ? "jobs stop":"reset");
5801
5802	if (!amdgpu_sriov_vf(adev))
5803		hive = amdgpu_get_xgmi_hive(adev);
5804	if (hive)
5805		mutex_lock(&hive->hive_lock);
5806
5807	reset_context->job = job;
5808	reset_context->hive = hive;
5809	/*
5810	 * Build list of devices to reset.
5811	 * In case we are in XGMI hive mode, resort the device list
5812	 * to put adev in the 1st position.
5813	 */
5814	INIT_LIST_HEAD(&device_list);
5815	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5816		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5817			list_add_tail(&tmp_adev->reset_list, &device_list);
5818			if (adev->shutdown)
5819				tmp_adev->shutdown = true;
5820		}
5821		if (!list_is_first(&adev->reset_list, &device_list))
5822			list_rotate_to_front(&adev->reset_list, &device_list);
5823		device_list_handle = &device_list;
5824	} else {
5825		list_add_tail(&adev->reset_list, &device_list);
5826		device_list_handle = &device_list;
5827	}
5828
5829	if (!amdgpu_sriov_vf(adev)) {
5830		r = amdgpu_device_health_check(device_list_handle);
5831		if (r)
5832			goto end_reset;
5833	}
5834
5835	/* We need to lock reset domain only once both for XGMI and single device */
5836	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5837				    reset_list);
5838	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5839
5840	/* block all schedulers and reset given job's ring */
5841	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5842
5843		amdgpu_device_set_mp1_state(tmp_adev);
5844
5845		/*
5846		 * Try to put the audio codec into suspend state
5847		 * before gpu reset started.
5848		 *
5849		 * Due to the power domain of the graphics device
5850		 * is shared with AZ power domain. Without this,
5851		 * we may change the audio hardware from behind
5852		 * the audio driver's back. That will trigger
5853		 * some audio codec errors.
5854		 */
5855		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5856			audio_suspended = true;
5857
5858		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5859
5860		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5861
5862		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5863
5864		/*
5865		 * Mark these ASICs to be reseted as untracked first
5866		 * And add them back after reset completed
5867		 */
5868		amdgpu_unregister_gpu_instance(tmp_adev);
5869
5870		drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
5871
5872		/* disable ras on ALL IPs */
5873		if (!need_emergency_restart &&
5874		      amdgpu_device_ip_need_full_reset(tmp_adev))
5875			amdgpu_ras_suspend(tmp_adev);
5876
5877		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5878			struct amdgpu_ring *ring = tmp_adev->rings[i];
5879
5880			if (!amdgpu_ring_sched_ready(ring))
5881				continue;
5882
5883			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5884
5885			if (need_emergency_restart)
5886				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5887		}
5888		atomic_inc(&tmp_adev->gpu_reset_counter);
5889	}
5890
5891	if (need_emergency_restart)
5892		goto skip_sched_resume;
5893
5894	/*
5895	 * Must check guilty signal here since after this point all old
5896	 * HW fences are force signaled.
5897	 *
5898	 * job->base holds a reference to parent fence
5899	 */
5900	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5901		job_signaled = true;
5902		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5903		goto skip_hw_reset;
5904	}
5905
5906retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5907	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5908		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5909		/*TODO Should we stop ?*/
5910		if (r) {
5911			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5912				  r, adev_to_drm(tmp_adev)->unique);
5913			tmp_adev->asic_reset_res = r;
5914		}
5915	}
5916
5917	/* Actual ASIC resets if needed.*/
5918	/* Host driver will handle XGMI hive reset for SRIOV */
5919	if (amdgpu_sriov_vf(adev)) {
5920		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5921			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5922			amdgpu_ras_set_fed(adev, true);
5923			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5924		}
5925
5926		r = amdgpu_device_reset_sriov(adev, reset_context);
5927		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5928			amdgpu_virt_release_full_gpu(adev, true);
5929			goto retry;
5930		}
5931		if (r)
5932			adev->asic_reset_res = r;
5933	} else {
5934		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5935		if (r && r == -EAGAIN)
5936			goto retry;
5937	}
5938
5939	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5940		/*
5941		 * Drop any pending non scheduler resets queued before reset is done.
5942		 * Any reset scheduled after this point would be valid. Scheduler resets
5943		 * were already dropped during drm_sched_stop and no new ones can come
5944		 * in before drm_sched_start.
5945		 */
5946		amdgpu_device_stop_pending_resets(tmp_adev);
5947	}
5948
5949skip_hw_reset:
5950
5951	/* Post ASIC reset for all devs .*/
5952	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5953
5954		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5955			struct amdgpu_ring *ring = tmp_adev->rings[i];
5956
5957			if (!amdgpu_ring_sched_ready(ring))
5958				continue;
5959
5960			drm_sched_start(&ring->sched, 0);
5961		}
5962
5963		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5964			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5965
5966		if (tmp_adev->asic_reset_res)
5967			r = tmp_adev->asic_reset_res;
5968
5969		tmp_adev->asic_reset_res = 0;
5970
5971		if (r) {
5972			/* bad news, how to tell it to userspace ?
5973			 * for ras error, we should report GPU bad status instead of
5974			 * reset failure
5975			 */
5976			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5977			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5978				dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
5979					atomic_read(&tmp_adev->gpu_reset_counter));
5980			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5981		} else {
5982			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5983			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5984				DRM_WARN("smart shift update failed\n");
5985		}
5986	}
5987
5988skip_sched_resume:
5989	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5990		/* unlock kfd: SRIOV would do it separately */
5991		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5992			amdgpu_amdkfd_post_reset(tmp_adev);
5993
5994		/* kfd_post_reset will do nothing if kfd device is not initialized,
5995		 * need to bring up kfd here if it's not be initialized before
5996		 */
5997		if (!adev->kfd.init_complete)
5998			amdgpu_amdkfd_device_init(adev);
5999
6000		if (audio_suspended)
6001			amdgpu_device_resume_display_audio(tmp_adev);
6002
6003		amdgpu_device_unset_mp1_state(tmp_adev);
6004
6005		amdgpu_ras_set_error_query_ready(tmp_adev, true);
6006	}
6007
6008	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
6009					    reset_list);
6010	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
6011
6012end_reset:
6013	if (hive) {
6014		mutex_unlock(&hive->hive_lock);
6015		amdgpu_put_xgmi_hive(hive);
6016	}
6017
6018	if (r)
6019		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6020
6021	atomic_set(&adev->reset_domain->reset_res, r);
6022	return r;
6023}
6024
6025/**
6026 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6027 *
6028 * @adev: amdgpu_device pointer
6029 * @speed: pointer to the speed of the link
6030 * @width: pointer to the width of the link
6031 *
6032 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6033 * first physical partner to an AMD dGPU.
6034 * This will exclude any virtual switches and links.
6035 */
6036static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6037					    enum pci_bus_speed *speed,
6038					    enum pcie_link_width *width)
6039{
6040	struct pci_dev *parent = adev->pdev;
6041
6042	if (!speed || !width)
6043		return;
6044
6045	*speed = PCI_SPEED_UNKNOWN;
6046	*width = PCIE_LNK_WIDTH_UNKNOWN;
6047
6048	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6049		while ((parent = pci_upstream_bridge(parent))) {
6050			/* skip upstream/downstream switches internal to dGPU*/
6051			if (parent->vendor == PCI_VENDOR_ID_ATI)
6052				continue;
6053			*speed = pcie_get_speed_cap(parent);
6054			*width = pcie_get_width_cap(parent);
6055			break;
6056		}
6057	} else {
6058		/* use the current speeds rather than max if switching is not supported */
6059		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6060	}
6061}
6062
6063/**
6064 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6065 *
6066 * @adev: amdgpu_device pointer
6067 *
6068 * Fetchs and stores in the driver the PCIE capabilities (gen speed
6069 * and lanes) of the slot the device is in. Handles APUs and
6070 * virtualized environments where PCIE config space may not be available.
6071 */
6072static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6073{
6074	struct pci_dev *pdev;
6075	enum pci_bus_speed speed_cap, platform_speed_cap;
6076	enum pcie_link_width platform_link_width;
6077
6078	if (amdgpu_pcie_gen_cap)
6079		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6080
6081	if (amdgpu_pcie_lane_cap)
6082		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6083
6084	/* covers APUs as well */
6085	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6086		if (adev->pm.pcie_gen_mask == 0)
6087			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6088		if (adev->pm.pcie_mlw_mask == 0)
6089			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6090		return;
6091	}
6092
6093	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6094		return;
6095
6096	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6097					&platform_link_width);
6098
6099	if (adev->pm.pcie_gen_mask == 0) {
6100		/* asic caps */
6101		pdev = adev->pdev;
6102		speed_cap = pcie_get_speed_cap(pdev);
6103		if (speed_cap == PCI_SPEED_UNKNOWN) {
6104			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6105						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6106						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6107		} else {
6108			if (speed_cap == PCIE_SPEED_32_0GT)
6109				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6110							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6111							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6112							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6113							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6114			else if (speed_cap == PCIE_SPEED_16_0GT)
6115				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6116							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6117							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6118							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6119			else if (speed_cap == PCIE_SPEED_8_0GT)
6120				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6121							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6122							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6123			else if (speed_cap == PCIE_SPEED_5_0GT)
6124				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6125							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6126			else
6127				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6128		}
6129		/* platform caps */
6130		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6131			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6132						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6133		} else {
6134			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6135				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6136							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6137							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6138							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6139							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6140			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6141				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6142							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6143							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6144							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6145			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6146				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6147							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6148							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6149			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6150				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6151							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6152			else
6153				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6154
6155		}
6156	}
6157	if (adev->pm.pcie_mlw_mask == 0) {
6158		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6159			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6160		} else {
6161			switch (platform_link_width) {
6162			case PCIE_LNK_X32:
6163				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6164							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6165							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6166							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6167							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6168							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6169							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6170				break;
6171			case PCIE_LNK_X16:
6172				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6173							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6174							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6175							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6176							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6177							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6178				break;
6179			case PCIE_LNK_X12:
6180				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6181							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6182							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6183							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6184							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6185				break;
6186			case PCIE_LNK_X8:
6187				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6188							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6189							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6190							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6191				break;
6192			case PCIE_LNK_X4:
6193				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6194							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6195							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6196				break;
6197			case PCIE_LNK_X2:
6198				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6199							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6200				break;
6201			case PCIE_LNK_X1:
6202				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6203				break;
6204			default:
6205				break;
6206			}
6207		}
6208	}
6209}
6210
6211/**
6212 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6213 *
6214 * @adev: amdgpu_device pointer
6215 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6216 *
6217 * Return true if @peer_adev can access (DMA) @adev through the PCIe
6218 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6219 * @peer_adev.
6220 */
6221bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6222				      struct amdgpu_device *peer_adev)
6223{
6224#ifdef CONFIG_HSA_AMD_P2P
6225	bool p2p_access =
6226		!adev->gmc.xgmi.connected_to_cpu &&
6227		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6228	if (!p2p_access)
6229		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6230			pci_name(peer_adev->pdev));
6231
6232	bool is_large_bar = adev->gmc.visible_vram_size &&
6233		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6234	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6235
6236	if (!p2p_addressable) {
6237		uint64_t address_mask = peer_adev->dev->dma_mask ?
6238			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6239		resource_size_t aper_limit =
6240			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6241
6242		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6243				     aper_limit & address_mask);
6244	}
6245	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6246#else
6247	return false;
6248#endif
6249}
6250
6251int amdgpu_device_baco_enter(struct drm_device *dev)
6252{
6253	struct amdgpu_device *adev = drm_to_adev(dev);
6254	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6255
6256	if (!amdgpu_device_supports_baco(dev))
6257		return -ENOTSUPP;
6258
6259	if (ras && adev->ras_enabled &&
6260	    adev->nbio.funcs->enable_doorbell_interrupt)
6261		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6262
6263	return amdgpu_dpm_baco_enter(adev);
6264}
6265
6266int amdgpu_device_baco_exit(struct drm_device *dev)
6267{
6268	struct amdgpu_device *adev = drm_to_adev(dev);
6269	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6270	int ret = 0;
6271
6272	if (!amdgpu_device_supports_baco(dev))
6273		return -ENOTSUPP;
6274
6275	ret = amdgpu_dpm_baco_exit(adev);
6276	if (ret)
6277		return ret;
6278
6279	if (ras && adev->ras_enabled &&
6280	    adev->nbio.funcs->enable_doorbell_interrupt)
6281		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6282
6283	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6284	    adev->nbio.funcs->clear_doorbell_interrupt)
6285		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6286
6287	return 0;
6288}
6289
6290/**
6291 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6292 * @pdev: PCI device struct
6293 * @state: PCI channel state
6294 *
6295 * Description: Called when a PCI error is detected.
6296 *
6297 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6298 */
6299pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6300{
6301	struct drm_device *dev = pci_get_drvdata(pdev);
6302	struct amdgpu_device *adev = drm_to_adev(dev);
6303	int i;
6304
6305	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6306
6307	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6308		DRM_WARN("No support for XGMI hive yet...");
6309		return PCI_ERS_RESULT_DISCONNECT;
6310	}
6311
6312	adev->pci_channel_state = state;
6313
6314	switch (state) {
6315	case pci_channel_io_normal:
6316		return PCI_ERS_RESULT_CAN_RECOVER;
6317	/* Fatal error, prepare for slot reset */
6318	case pci_channel_io_frozen:
6319		/*
6320		 * Locking adev->reset_domain->sem will prevent any external access
6321		 * to GPU during PCI error recovery
6322		 */
6323		amdgpu_device_lock_reset_domain(adev->reset_domain);
6324		amdgpu_device_set_mp1_state(adev);
6325
6326		/*
6327		 * Block any work scheduling as we do for regular GPU reset
6328		 * for the duration of the recovery
6329		 */
6330		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6331			struct amdgpu_ring *ring = adev->rings[i];
6332
6333			if (!amdgpu_ring_sched_ready(ring))
6334				continue;
6335
6336			drm_sched_stop(&ring->sched, NULL);
6337		}
6338		atomic_inc(&adev->gpu_reset_counter);
6339		return PCI_ERS_RESULT_NEED_RESET;
6340	case pci_channel_io_perm_failure:
6341		/* Permanent error, prepare for device removal */
6342		return PCI_ERS_RESULT_DISCONNECT;
6343	}
6344
6345	return PCI_ERS_RESULT_NEED_RESET;
6346}
6347
6348/**
6349 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6350 * @pdev: pointer to PCI device
6351 */
6352pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6353{
6354
6355	DRM_INFO("PCI error: mmio enabled callback!!\n");
6356
6357	/* TODO - dump whatever for debugging purposes */
6358
6359	/* This called only if amdgpu_pci_error_detected returns
6360	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6361	 * works, no need to reset slot.
6362	 */
6363
6364	return PCI_ERS_RESULT_RECOVERED;
6365}
6366
6367/**
6368 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6369 * @pdev: PCI device struct
6370 *
6371 * Description: This routine is called by the pci error recovery
6372 * code after the PCI slot has been reset, just before we
6373 * should resume normal operations.
6374 */
6375pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6376{
6377	struct drm_device *dev = pci_get_drvdata(pdev);
6378	struct amdgpu_device *adev = drm_to_adev(dev);
6379	int r, i;
6380	struct amdgpu_reset_context reset_context;
6381	u32 memsize;
6382	struct list_head device_list;
6383
6384	/* PCI error slot reset should be skipped During RAS recovery */
6385	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6386	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6387	    amdgpu_ras_in_recovery(adev))
6388		return PCI_ERS_RESULT_RECOVERED;
6389
6390	DRM_INFO("PCI error: slot reset callback!!\n");
6391
6392	memset(&reset_context, 0, sizeof(reset_context));
6393
6394	INIT_LIST_HEAD(&device_list);
6395	list_add_tail(&adev->reset_list, &device_list);
6396
6397	/* wait for asic to come out of reset */
6398	msleep(500);
6399
6400	/* Restore PCI confspace */
6401	amdgpu_device_load_pci_state(pdev);
6402
6403	/* confirm  ASIC came out of reset */
6404	for (i = 0; i < adev->usec_timeout; i++) {
6405		memsize = amdgpu_asic_get_config_memsize(adev);
6406
6407		if (memsize != 0xffffffff)
6408			break;
6409		udelay(1);
6410	}
6411	if (memsize == 0xffffffff) {
6412		r = -ETIME;
6413		goto out;
6414	}
6415
6416	reset_context.method = AMD_RESET_METHOD_NONE;
6417	reset_context.reset_req_dev = adev;
6418	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6419	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6420
6421	adev->no_hw_access = true;
6422	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6423	adev->no_hw_access = false;
6424	if (r)
6425		goto out;
6426
6427	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6428
6429out:
6430	if (!r) {
6431		if (amdgpu_device_cache_pci_state(adev->pdev))
6432			pci_restore_state(adev->pdev);
6433
6434		DRM_INFO("PCIe error recovery succeeded\n");
6435	} else {
6436		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6437		amdgpu_device_unset_mp1_state(adev);
6438		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6439	}
6440
6441	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6442}
6443
6444/**
6445 * amdgpu_pci_resume() - resume normal ops after PCI reset
6446 * @pdev: pointer to PCI device
6447 *
6448 * Called when the error recovery driver tells us that its
6449 * OK to resume normal operation.
6450 */
6451void amdgpu_pci_resume(struct pci_dev *pdev)
6452{
6453	struct drm_device *dev = pci_get_drvdata(pdev);
6454	struct amdgpu_device *adev = drm_to_adev(dev);
6455	int i;
6456
6457
6458	DRM_INFO("PCI error: resume callback!!\n");
6459
6460	/* Only continue execution for the case of pci_channel_io_frozen */
6461	if (adev->pci_channel_state != pci_channel_io_frozen)
6462		return;
6463
6464	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6465		struct amdgpu_ring *ring = adev->rings[i];
6466
6467		if (!amdgpu_ring_sched_ready(ring))
6468			continue;
6469
6470		drm_sched_start(&ring->sched, 0);
6471	}
6472
6473	amdgpu_device_unset_mp1_state(adev);
6474	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6475}
6476
6477bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6478{
6479	struct drm_device *dev = pci_get_drvdata(pdev);
6480	struct amdgpu_device *adev = drm_to_adev(dev);
6481	int r;
6482
6483	if (amdgpu_sriov_vf(adev))
6484		return false;
6485
6486	r = pci_save_state(pdev);
6487	if (!r) {
6488		kfree(adev->pci_state);
6489
6490		adev->pci_state = pci_store_saved_state(pdev);
6491
6492		if (!adev->pci_state) {
6493			DRM_ERROR("Failed to store PCI saved state");
6494			return false;
6495		}
6496	} else {
6497		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6498		return false;
6499	}
6500
6501	return true;
6502}
6503
6504bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6505{
6506	struct drm_device *dev = pci_get_drvdata(pdev);
6507	struct amdgpu_device *adev = drm_to_adev(dev);
6508	int r;
6509
6510	if (!adev->pci_state)
6511		return false;
6512
6513	r = pci_load_saved_state(pdev, adev->pci_state);
6514
6515	if (!r) {
6516		pci_restore_state(pdev);
6517	} else {
6518		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6519		return false;
6520	}
6521
6522	return true;
6523}
6524
6525void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6526		struct amdgpu_ring *ring)
6527{
6528#ifdef CONFIG_X86_64
6529	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6530		return;
6531#endif
6532	if (adev->gmc.xgmi.connected_to_cpu)
6533		return;
6534
6535	if (ring && ring->funcs->emit_hdp_flush)
6536		amdgpu_ring_emit_hdp_flush(ring);
6537	else
6538		amdgpu_asic_flush_hdp(adev, ring);
6539}
6540
6541void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6542		struct amdgpu_ring *ring)
6543{
6544#ifdef CONFIG_X86_64
6545	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6546		return;
6547#endif
6548	if (adev->gmc.xgmi.connected_to_cpu)
6549		return;
6550
6551	amdgpu_asic_invalidate_hdp(adev, ring);
6552}
6553
6554int amdgpu_in_reset(struct amdgpu_device *adev)
6555{
6556	return atomic_read(&adev->reset_domain->in_gpu_reset);
6557}
6558
6559/**
6560 * amdgpu_device_halt() - bring hardware to some kind of halt state
6561 *
6562 * @adev: amdgpu_device pointer
6563 *
6564 * Bring hardware to some kind of halt state so that no one can touch it
6565 * any more. It will help to maintain error context when error occurred.
6566 * Compare to a simple hang, the system will keep stable at least for SSH
6567 * access. Then it should be trivial to inspect the hardware state and
6568 * see what's going on. Implemented as following:
6569 *
6570 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6571 *    clears all CPU mappings to device, disallows remappings through page faults
6572 * 2. amdgpu_irq_disable_all() disables all interrupts
6573 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6574 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6575 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6576 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6577 *    flush any in flight DMA operations
6578 */
6579void amdgpu_device_halt(struct amdgpu_device *adev)
6580{
6581	struct pci_dev *pdev = adev->pdev;
6582	struct drm_device *ddev = adev_to_drm(adev);
6583
6584	amdgpu_xcp_dev_unplug(adev);
6585	drm_dev_unplug(ddev);
6586
6587	amdgpu_irq_disable_all(adev);
6588
6589	amdgpu_fence_driver_hw_fini(adev);
6590
6591	adev->no_hw_access = true;
6592
6593	amdgpu_device_unmap_mmio(adev);
6594
6595	pci_disable_device(pdev);
6596	pci_wait_for_pending_transaction(pdev);
6597}
6598
6599u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6600				u32 reg)
6601{
6602	unsigned long flags, address, data;
6603	u32 r;
6604
6605	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6606	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6607
6608	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6609	WREG32(address, reg * 4);
6610	(void)RREG32(address);
6611	r = RREG32(data);
6612	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6613	return r;
6614}
6615
6616void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6617				u32 reg, u32 v)
6618{
6619	unsigned long flags, address, data;
6620
6621	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6622	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6623
6624	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6625	WREG32(address, reg * 4);
6626	(void)RREG32(address);
6627	WREG32(data, v);
6628	(void)RREG32(data);
6629	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6630}
6631
6632/**
6633 * amdgpu_device_get_gang - return a reference to the current gang
6634 * @adev: amdgpu_device pointer
6635 *
6636 * Returns: A new reference to the current gang leader.
6637 */
6638struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6639{
6640	struct dma_fence *fence;
6641
6642	rcu_read_lock();
6643	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6644	rcu_read_unlock();
6645	return fence;
6646}
6647
6648/**
6649 * amdgpu_device_switch_gang - switch to a new gang
6650 * @adev: amdgpu_device pointer
6651 * @gang: the gang to switch to
6652 *
6653 * Try to switch to a new gang.
6654 * Returns: NULL if we switched to the new gang or a reference to the current
6655 * gang leader.
6656 */
6657struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6658					    struct dma_fence *gang)
6659{
6660	struct dma_fence *old = NULL;
6661
6662	do {
6663		dma_fence_put(old);
6664		old = amdgpu_device_get_gang(adev);
6665		if (old == gang)
6666			break;
6667
6668		if (!dma_fence_is_signaled(old))
6669			return old;
6670
6671	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6672			 old, gang) != old);
6673
6674	dma_fence_put(old);
6675	return NULL;
6676}
6677
6678bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6679{
6680	switch (adev->asic_type) {
6681#ifdef CONFIG_DRM_AMDGPU_SI
6682	case CHIP_HAINAN:
6683#endif
6684	case CHIP_TOPAZ:
6685		/* chips with no display hardware */
6686		return false;
6687#ifdef CONFIG_DRM_AMDGPU_SI
6688	case CHIP_TAHITI:
6689	case CHIP_PITCAIRN:
6690	case CHIP_VERDE:
6691	case CHIP_OLAND:
6692#endif
6693#ifdef CONFIG_DRM_AMDGPU_CIK
6694	case CHIP_BONAIRE:
6695	case CHIP_HAWAII:
6696	case CHIP_KAVERI:
6697	case CHIP_KABINI:
6698	case CHIP_MULLINS:
6699#endif
6700	case CHIP_TONGA:
6701	case CHIP_FIJI:
6702	case CHIP_POLARIS10:
6703	case CHIP_POLARIS11:
6704	case CHIP_POLARIS12:
6705	case CHIP_VEGAM:
6706	case CHIP_CARRIZO:
6707	case CHIP_STONEY:
6708		/* chips with display hardware */
6709		return true;
6710	default:
6711		/* IP discovery */
6712		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6713		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6714			return false;
6715		return true;
6716	}
6717}
6718
6719uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6720		uint32_t inst, uint32_t reg_addr, char reg_name[],
6721		uint32_t expected_value, uint32_t mask)
6722{
6723	uint32_t ret = 0;
6724	uint32_t old_ = 0;
6725	uint32_t tmp_ = RREG32(reg_addr);
6726	uint32_t loop = adev->usec_timeout;
6727
6728	while ((tmp_ & (mask)) != (expected_value)) {
6729		if (old_ != tmp_) {
6730			loop = adev->usec_timeout;
6731			old_ = tmp_;
6732		} else
6733			udelay(1);
6734		tmp_ = RREG32(reg_addr);
6735		loop--;
6736		if (!loop) {
6737			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6738				  inst, reg_name, (uint32_t)expected_value,
6739				  (uint32_t)(tmp_ & (mask)));
6740			ret = -ETIMEDOUT;
6741			break;
6742		}
6743	}
6744	return ret;
6745}
6746
6747ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6748{
6749	ssize_t size = 0;
6750
6751	if (!ring || !ring->adev)
6752		return size;
6753
6754	if (amdgpu_device_should_recover_gpu(ring->adev))
6755		size |= AMDGPU_RESET_TYPE_FULL;
6756
6757	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6758	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6759		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6760
6761	return size;
6762}
6763
6764ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6765{
6766	ssize_t size = 0;
6767
6768	if (supported_reset == 0) {
6769		size += sysfs_emit_at(buf, size, "unsupported");
6770		size += sysfs_emit_at(buf, size, "\n");
6771		return size;
6772
6773	}
6774
6775	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6776		size += sysfs_emit_at(buf, size, "soft ");
6777
6778	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6779		size += sysfs_emit_at(buf, size, "queue ");
6780
6781	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6782		size += sysfs_emit_at(buf, size, "pipe ");
6783
6784	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6785		size += sysfs_emit_at(buf, size, "full ");
6786
6787	size += sysfs_emit_at(buf, size, "\n");
6788	return size;
6789}