drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c at v6.7-rc6

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.c
at v6.7-rc6 3746 lines 101 kB view raw
wrap content
   1/*
   2 * Copyright 2018 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 *
  23 */
  24#include <linux/debugfs.h>
  25#include <linux/list.h>
  26#include <linux/module.h>
  27#include <linux/uaccess.h>
  28#include <linux/reboot.h>
  29#include <linux/syscalls.h>
  30#include <linux/pm_runtime.h>
  31#include <linux/list_sort.h>
  32
  33#include "amdgpu.h"
  34#include "amdgpu_ras.h"
  35#include "amdgpu_atomfirmware.h"
  36#include "amdgpu_xgmi.h"
  37#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
  38#include "nbio_v4_3.h"
  39#include "nbio_v7_9.h"
  40#include "atom.h"
  41#include "amdgpu_reset.h"
  42
  43#ifdef CONFIG_X86_MCE_AMD
  44#include <asm/mce.h>
  45
  46static bool notifier_registered;
  47#endif
  48static const char *RAS_FS_NAME = "ras";
  49
  50const char *ras_error_string[] = {
  51	"none",
  52	"parity",
  53	"single_correctable",
  54	"multi_uncorrectable",
  55	"poison",
  56};
  57
  58const char *ras_block_string[] = {
  59	"umc",
  60	"sdma",
  61	"gfx",
  62	"mmhub",
  63	"athub",
  64	"pcie_bif",
  65	"hdp",
  66	"xgmi_wafl",
  67	"df",
  68	"smn",
  69	"sem",
  70	"mp0",
  71	"mp1",
  72	"fuse",
  73	"mca",
  74	"vcn",
  75	"jpeg",
  76};
  77
  78const char *ras_mca_block_string[] = {
  79	"mca_mp0",
  80	"mca_mp1",
  81	"mca_mpio",
  82	"mca_iohc",
  83};
  84
  85struct amdgpu_ras_block_list {
  86	/* ras block link */
  87	struct list_head node;
  88
  89	struct amdgpu_ras_block_object *ras_obj;
  90};
  91
  92const char *get_ras_block_str(struct ras_common_if *ras_block)
  93{
  94	if (!ras_block)
  95		return "NULL";
  96
  97	if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
  98		return "OUT OF RANGE";
  99
 100	if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
 101		return ras_mca_block_string[ras_block->sub_block_index];
 102
 103	return ras_block_string[ras_block->block];
 104}
 105
 106#define ras_block_str(_BLOCK_) \
 107	(((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
 108
 109#define ras_err_str(i) (ras_error_string[ffs(i)])
 110
 111#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 112
 113/* inject address is 52 bits */
 114#define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
 115
 116/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
 117#define RAS_BAD_PAGE_COVER              (100 * 1024 * 1024ULL)
 118
 119enum amdgpu_ras_retire_page_reservation {
 120	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
 121	AMDGPU_RAS_RETIRE_PAGE_PENDING,
 122	AMDGPU_RAS_RETIRE_PAGE_FAULT,
 123};
 124
 125atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
 126
 127static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
 128				uint64_t addr);
 129static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 130				uint64_t addr);
 131#ifdef CONFIG_X86_MCE_AMD
 132static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
 133struct mce_notifier_adev_list {
 134	struct amdgpu_device *devs[MAX_GPU_INSTANCE];
 135	int num_gpu;
 136};
 137static struct mce_notifier_adev_list mce_adev_list;
 138#endif
 139
 140void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 141{
 142	if (adev && amdgpu_ras_get_context(adev))
 143		amdgpu_ras_get_context(adev)->error_query_ready = ready;
 144}
 145
 146static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
 147{
 148	if (adev && amdgpu_ras_get_context(adev))
 149		return amdgpu_ras_get_context(adev)->error_query_ready;
 150
 151	return false;
 152}
 153
 154static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
 155{
 156	struct ras_err_data err_data;
 157	struct eeprom_table_record err_rec;
 158	int ret;
 159
 160	if ((address >= adev->gmc.mc_vram_size) ||
 161	    (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
 162		dev_warn(adev->dev,
 163		         "RAS WARN: input address 0x%llx is invalid.\n",
 164		         address);
 165		return -EINVAL;
 166	}
 167
 168	if (amdgpu_ras_check_bad_page(adev, address)) {
 169		dev_warn(adev->dev,
 170			 "RAS WARN: 0x%llx has already been marked as bad page!\n",
 171			 address);
 172		return 0;
 173	}
 174
 175	ret = amdgpu_ras_error_data_init(&err_data);
 176	if (ret)
 177		return ret;
 178
 179	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
 180	err_data.err_addr = &err_rec;
 181	amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
 182
 183	if (amdgpu_bad_page_threshold != 0) {
 184		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
 185					 err_data.err_addr_cnt);
 186		amdgpu_ras_save_bad_pages(adev, NULL);
 187	}
 188
 189	amdgpu_ras_error_data_fini(&err_data);
 190
 191	dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
 192	dev_warn(adev->dev, "Clear EEPROM:\n");
 193	dev_warn(adev->dev, "    echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
 194
 195	return 0;
 196}
 197
 198static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
 199					size_t size, loff_t *pos)
 200{
 201	struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
 202	struct ras_query_if info = {
 203		.head = obj->head,
 204	};
 205	ssize_t s;
 206	char val[128];
 207
 208	if (amdgpu_ras_query_error_status(obj->adev, &info))
 209		return -EINVAL;
 210
 211	/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
 212	if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
 213	    amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
 214		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
 215			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
 216	}
 217
 218	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
 219			"ue", info.ue_count,
 220			"ce", info.ce_count);
 221	if (*pos >= s)
 222		return 0;
 223
 224	s -= *pos;
 225	s = min_t(u64, s, size);
 226
 227
 228	if (copy_to_user(buf, &val[*pos], s))
 229		return -EINVAL;
 230
 231	*pos += s;
 232
 233	return s;
 234}
 235
 236static const struct file_operations amdgpu_ras_debugfs_ops = {
 237	.owner = THIS_MODULE,
 238	.read = amdgpu_ras_debugfs_read,
 239	.write = NULL,
 240	.llseek = default_llseek
 241};
 242
 243static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
 244{
 245	int i;
 246
 247	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
 248		*block_id = i;
 249		if (strcmp(name, ras_block_string[i]) == 0)
 250			return 0;
 251	}
 252	return -EINVAL;
 253}
 254
 255static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 256		const char __user *buf, size_t size,
 257		loff_t *pos, struct ras_debug_if *data)
 258{
 259	ssize_t s = min_t(u64, 64, size);
 260	char str[65];
 261	char block_name[33];
 262	char err[9] = "ue";
 263	int op = -1;
 264	int block_id;
 265	uint32_t sub_block;
 266	u64 address, value;
 267	/* default value is 0 if the mask is not set by user */
 268	u32 instance_mask = 0;
 269
 270	if (*pos)
 271		return -EINVAL;
 272	*pos = size;
 273
 274	memset(str, 0, sizeof(str));
 275	memset(data, 0, sizeof(*data));
 276
 277	if (copy_from_user(str, buf, s))
 278		return -EINVAL;
 279
 280	if (sscanf(str, "disable %32s", block_name) == 1)
 281		op = 0;
 282	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
 283		op = 1;
 284	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
 285		op = 2;
 286	else if (strstr(str, "retire_page") != NULL)
 287		op = 3;
 288	else if (str[0] && str[1] && str[2] && str[3])
 289		/* ascii string, but commands are not matched. */
 290		return -EINVAL;
 291
 292	if (op != -1) {
 293		if (op == 3) {
 294			if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
 295			    sscanf(str, "%*s %llu", &address) != 1)
 296				return -EINVAL;
 297
 298			data->op = op;
 299			data->inject.address = address;
 300
 301			return 0;
 302		}
 303
 304		if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
 305			return -EINVAL;
 306
 307		data->head.block = block_id;
 308		/* only ue and ce errors are supported */
 309		if (!memcmp("ue", err, 2))
 310			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 311		else if (!memcmp("ce", err, 2))
 312			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
 313		else
 314			return -EINVAL;
 315
 316		data->op = op;
 317
 318		if (op == 2) {
 319			if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
 320				   &sub_block, &address, &value, &instance_mask) != 4 &&
 321			    sscanf(str, "%*s %*s %*s %u %llu %llu %u",
 322				   &sub_block, &address, &value, &instance_mask) != 4 &&
 323				sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
 324				   &sub_block, &address, &value) != 3 &&
 325			    sscanf(str, "%*s %*s %*s %u %llu %llu",
 326				   &sub_block, &address, &value) != 3)
 327				return -EINVAL;
 328			data->head.sub_block_index = sub_block;
 329			data->inject.address = address;
 330			data->inject.value = value;
 331			data->inject.instance_mask = instance_mask;
 332		}
 333	} else {
 334		if (size < sizeof(*data))
 335			return -EINVAL;
 336
 337		if (copy_from_user(data, buf, sizeof(*data)))
 338			return -EINVAL;
 339	}
 340
 341	return 0;
 342}
 343
 344static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
 345				struct ras_debug_if *data)
 346{
 347	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 348	uint32_t mask, inst_mask = data->inject.instance_mask;
 349
 350	/* no need to set instance mask if there is only one instance */
 351	if (num_xcc <= 1 && inst_mask) {
 352		data->inject.instance_mask = 0;
 353		dev_dbg(adev->dev,
 354			"RAS inject mask(0x%x) isn't supported and force it to 0.\n",
 355			inst_mask);
 356
 357		return;
 358	}
 359
 360	switch (data->head.block) {
 361	case AMDGPU_RAS_BLOCK__GFX:
 362		mask = GENMASK(num_xcc - 1, 0);
 363		break;
 364	case AMDGPU_RAS_BLOCK__SDMA:
 365		mask = GENMASK(adev->sdma.num_instances - 1, 0);
 366		break;
 367	case AMDGPU_RAS_BLOCK__VCN:
 368	case AMDGPU_RAS_BLOCK__JPEG:
 369		mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
 370		break;
 371	default:
 372		mask = inst_mask;
 373		break;
 374	}
 375
 376	/* remove invalid bits in instance mask */
 377	data->inject.instance_mask &= mask;
 378	if (inst_mask != data->inject.instance_mask)
 379		dev_dbg(adev->dev,
 380			"Adjust RAS inject mask 0x%x to 0x%x\n",
 381			inst_mask, data->inject.instance_mask);
 382}
 383
 384/**
 385 * DOC: AMDGPU RAS debugfs control interface
 386 *
 387 * The control interface accepts struct ras_debug_if which has two members.
 388 *
 389 * First member: ras_debug_if::head or ras_debug_if::inject.
 390 *
 391 * head is used to indicate which IP block will be under control.
 392 *
 393 * head has four members, they are block, type, sub_block_index, name.
 394 * block: which IP will be under control.
 395 * type: what kind of error will be enabled/disabled/injected.
 396 * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
 397 * name: the name of IP.
 398 *
 399 * inject has three more members than head, they are address, value and mask.
 400 * As their names indicate, inject operation will write the
 401 * value to the address.
 402 *
 403 * The second member: struct ras_debug_if::op.
 404 * It has three kinds of operations.
 405 *
 406 * - 0: disable RAS on the block. Take ::head as its data.
 407 * - 1: enable RAS on the block. Take ::head as its data.
 408 * - 2: inject errors on the block. Take ::inject as its data.
 409 *
 410 * How to use the interface?
 411 *
 412 * In a program
 413 *
 414 * Copy the struct ras_debug_if in your code and initialize it.
 415 * Write the struct to the control interface.
 416 *
 417 * From shell
 418 *
 419 * .. code-block:: bash
 420 *
 421 *	echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 422 *	echo "enable  <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 423 *	echo "inject  <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 424 *
 425 * Where N, is the card which you want to affect.
 426 *
 427 * "disable" requires only the block.
 428 * "enable" requires the block and error type.
 429 * "inject" requires the block, error type, address, and value.
 430 *
 431 * The block is one of: umc, sdma, gfx, etc.
 432 *	see ras_block_string[] for details
 433 *
 434 * The error type is one of: ue, ce, where,
 435 *	ue is multi-uncorrectable
 436 *	ce is single-correctable
 437 *
 438 * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
 439 * The address and value are hexadecimal numbers, leading 0x is optional.
 440 * The mask means instance mask, is optional, default value is 0x1.
 441 *
 442 * For instance,
 443 *
 444 * .. code-block:: bash
 445 *
 446 *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
 447 *	echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
 448 *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
 449 *
 450 * How to check the result of the operation?
 451 *
 452 * To check disable/enable, see "ras" features at,
 453 * /sys/class/drm/card[0/1/2...]/device/ras/features
 454 *
 455 * To check inject, see the corresponding error count at,
 456 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
 457 *
 458 * .. note::
 459 *	Operations are only allowed on blocks which are supported.
 460 *	Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
 461 *	to see which blocks support RAS on a particular asic.
 462 *
 463 */
 464static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
 465					     const char __user *buf,
 466					     size_t size, loff_t *pos)
 467{
 468	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
 469	struct ras_debug_if data;
 470	int ret = 0;
 471
 472	if (!amdgpu_ras_get_error_query_ready(adev)) {
 473		dev_warn(adev->dev, "RAS WARN: error injection "
 474				"currently inaccessible\n");
 475		return size;
 476	}
 477
 478	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
 479	if (ret)
 480		return ret;
 481
 482	if (data.op == 3) {
 483		ret = amdgpu_reserve_page_direct(adev, data.inject.address);
 484		if (!ret)
 485			return size;
 486		else
 487			return ret;
 488	}
 489
 490	if (!amdgpu_ras_is_supported(adev, data.head.block))
 491		return -EINVAL;
 492
 493	switch (data.op) {
 494	case 0:
 495		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
 496		break;
 497	case 1:
 498		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
 499		break;
 500	case 2:
 501		if ((data.inject.address >= adev->gmc.mc_vram_size &&
 502		    adev->gmc.mc_vram_size) ||
 503		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
 504			dev_warn(adev->dev, "RAS WARN: input address "
 505					"0x%llx is invalid.",
 506					data.inject.address);
 507			ret = -EINVAL;
 508			break;
 509		}
 510
 511		/* umc ce/ue error injection for a bad page is not allowed */
 512		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
 513		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
 514			dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
 515				 "already been marked as bad!\n",
 516				 data.inject.address);
 517			break;
 518		}
 519
 520		amdgpu_ras_instance_mask_check(adev, &data);
 521
 522		/* data.inject.address is offset instead of absolute gpu address */
 523		ret = amdgpu_ras_error_inject(adev, &data.inject);
 524		break;
 525	default:
 526		ret = -EINVAL;
 527		break;
 528	}
 529
 530	if (ret)
 531		return ret;
 532
 533	return size;
 534}
 535
 536/**
 537 * DOC: AMDGPU RAS debugfs EEPROM table reset interface
 538 *
 539 * Some boards contain an EEPROM which is used to persistently store a list of
 540 * bad pages which experiences ECC errors in vram.  This interface provides
 541 * a way to reset the EEPROM, e.g., after testing error injection.
 542 *
 543 * Usage:
 544 *
 545 * .. code-block:: bash
 546 *
 547 *	echo 1 > ../ras/ras_eeprom_reset
 548 *
 549 * will reset EEPROM table to 0 entries.
 550 *
 551 */
 552static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
 553					       const char __user *buf,
 554					       size_t size, loff_t *pos)
 555{
 556	struct amdgpu_device *adev =
 557		(struct amdgpu_device *)file_inode(f)->i_private;
 558	int ret;
 559
 560	ret = amdgpu_ras_eeprom_reset_table(
 561		&(amdgpu_ras_get_context(adev)->eeprom_control));
 562
 563	if (!ret) {
 564		/* Something was written to EEPROM.
 565		 */
 566		amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
 567		return size;
 568	} else {
 569		return ret;
 570	}
 571}
 572
 573static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
 574	.owner = THIS_MODULE,
 575	.read = NULL,
 576	.write = amdgpu_ras_debugfs_ctrl_write,
 577	.llseek = default_llseek
 578};
 579
 580static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
 581	.owner = THIS_MODULE,
 582	.read = NULL,
 583	.write = amdgpu_ras_debugfs_eeprom_write,
 584	.llseek = default_llseek
 585};
 586
 587/**
 588 * DOC: AMDGPU RAS sysfs Error Count Interface
 589 *
 590 * It allows the user to read the error count for each IP block on the gpu through
 591 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
 592 *
 593 * It outputs the multiple lines which report the uncorrected (ue) and corrected
 594 * (ce) error counts.
 595 *
 596 * The format of one line is below,
 597 *
 598 * [ce|ue]: count
 599 *
 600 * Example:
 601 *
 602 * .. code-block:: bash
 603 *
 604 *	ue: 0
 605 *	ce: 1
 606 *
 607 */
 608static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 609		struct device_attribute *attr, char *buf)
 610{
 611	struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
 612	struct ras_query_if info = {
 613		.head = obj->head,
 614	};
 615
 616	if (!amdgpu_ras_get_error_query_ready(obj->adev))
 617		return sysfs_emit(buf, "Query currently inaccessible\n");
 618
 619	if (amdgpu_ras_query_error_status(obj->adev, &info))
 620		return -EINVAL;
 621
 622	if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
 623	    amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
 624		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
 625			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
 626	}
 627
 628	return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
 629			  "ce", info.ce_count);
 630}
 631
 632/* obj begin */
 633
 634#define get_obj(obj) do { (obj)->use++; } while (0)
 635#define alive_obj(obj) ((obj)->use)
 636
 637static inline void put_obj(struct ras_manager *obj)
 638{
 639	if (obj && (--obj->use == 0)) {
 640		list_del(&obj->node);
 641		amdgpu_ras_error_data_fini(&obj->err_data);
 642	}
 643
 644	if (obj && (obj->use < 0))
 645		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
 646}
 647
 648/* make one obj and return it. */
 649static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
 650		struct ras_common_if *head)
 651{
 652	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 653	struct ras_manager *obj;
 654
 655	if (!adev->ras_enabled || !con)
 656		return NULL;
 657
 658	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 659		return NULL;
 660
 661	if (head->block == AMDGPU_RAS_BLOCK__MCA) {
 662		if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
 663			return NULL;
 664
 665		obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
 666	} else
 667		obj = &con->objs[head->block];
 668
 669	/* already exist. return obj? */
 670	if (alive_obj(obj))
 671		return NULL;
 672
 673	if (amdgpu_ras_error_data_init(&obj->err_data))
 674		return NULL;
 675
 676	obj->head = *head;
 677	obj->adev = adev;
 678	list_add(&obj->node, &con->head);
 679	get_obj(obj);
 680
 681	return obj;
 682}
 683
 684/* return an obj equal to head, or the first when head is NULL */
 685struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 686		struct ras_common_if *head)
 687{
 688	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 689	struct ras_manager *obj;
 690	int i;
 691
 692	if (!adev->ras_enabled || !con)
 693		return NULL;
 694
 695	if (head) {
 696		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 697			return NULL;
 698
 699		if (head->block == AMDGPU_RAS_BLOCK__MCA) {
 700			if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
 701				return NULL;
 702
 703			obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
 704		} else
 705			obj = &con->objs[head->block];
 706
 707		if (alive_obj(obj))
 708			return obj;
 709	} else {
 710		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
 711			obj = &con->objs[i];
 712			if (alive_obj(obj))
 713				return obj;
 714		}
 715	}
 716
 717	return NULL;
 718}
 719/* obj end */
 720
 721/* feature ctl begin */
 722static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
 723					 struct ras_common_if *head)
 724{
 725	return adev->ras_hw_enabled & BIT(head->block);
 726}
 727
 728static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
 729		struct ras_common_if *head)
 730{
 731	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 732
 733	return con->features & BIT(head->block);
 734}
 735
 736/*
 737 * if obj is not created, then create one.
 738 * set feature enable flag.
 739 */
 740static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 741		struct ras_common_if *head, int enable)
 742{
 743	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 744	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
 745
 746	/* If hardware does not support ras, then do not create obj.
 747	 * But if hardware support ras, we can create the obj.
 748	 * Ras framework checks con->hw_supported to see if it need do
 749	 * corresponding initialization.
 750	 * IP checks con->support to see if it need disable ras.
 751	 */
 752	if (!amdgpu_ras_is_feature_allowed(adev, head))
 753		return 0;
 754
 755	if (enable) {
 756		if (!obj) {
 757			obj = amdgpu_ras_create_obj(adev, head);
 758			if (!obj)
 759				return -EINVAL;
 760		} else {
 761			/* In case we create obj somewhere else */
 762			get_obj(obj);
 763		}
 764		con->features |= BIT(head->block);
 765	} else {
 766		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
 767			con->features &= ~BIT(head->block);
 768			put_obj(obj);
 769		}
 770	}
 771
 772	return 0;
 773}
 774
 775/* wrapper of psp_ras_enable_features */
 776int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 777		struct ras_common_if *head, bool enable)
 778{
 779	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 780	union ta_ras_cmd_input *info;
 781	int ret;
 782
 783	if (!con)
 784		return -EINVAL;
 785
 786	/* For non-gfx ip, do not enable ras feature if it is not allowed */
 787	/* For gfx ip, regardless of feature support status, */
 788	/* Force issue enable or disable ras feature commands */
 789	if (head->block != AMDGPU_RAS_BLOCK__GFX &&
 790	    !amdgpu_ras_is_feature_allowed(adev, head))
 791		return 0;
 792
 793	/* Only enable gfx ras feature from host side */
 794	if (head->block == AMDGPU_RAS_BLOCK__GFX &&
 795	    !amdgpu_sriov_vf(adev) &&
 796	    !amdgpu_ras_intr_triggered()) {
 797		info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
 798		if (!info)
 799			return -ENOMEM;
 800
 801		if (!enable) {
 802			info->disable_features = (struct ta_ras_disable_features_input) {
 803				.block_id =  amdgpu_ras_block_to_ta(head->block),
 804				.error_type = amdgpu_ras_error_to_ta(head->type),
 805			};
 806		} else {
 807			info->enable_features = (struct ta_ras_enable_features_input) {
 808				.block_id =  amdgpu_ras_block_to_ta(head->block),
 809				.error_type = amdgpu_ras_error_to_ta(head->type),
 810			};
 811		}
 812
 813		ret = psp_ras_enable_features(&adev->psp, info, enable);
 814		if (ret) {
 815			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
 816				enable ? "enable":"disable",
 817				get_ras_block_str(head),
 818				amdgpu_ras_is_poison_mode_supported(adev), ret);
 819			kfree(info);
 820			return ret;
 821		}
 822
 823		kfree(info);
 824	}
 825
 826	/* setup the obj */
 827	__amdgpu_ras_feature_enable(adev, head, enable);
 828
 829	return 0;
 830}
 831
 832/* Only used in device probe stage and called only once. */
 833int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 834		struct ras_common_if *head, bool enable)
 835{
 836	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 837	int ret;
 838
 839	if (!con)
 840		return -EINVAL;
 841
 842	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
 843		if (enable) {
 844			/* There is no harm to issue a ras TA cmd regardless of
 845			 * the currecnt ras state.
 846			 * If current state == target state, it will do nothing
 847			 * But sometimes it requests driver to reset and repost
 848			 * with error code -EAGAIN.
 849			 */
 850			ret = amdgpu_ras_feature_enable(adev, head, 1);
 851			/* With old ras TA, we might fail to enable ras.
 852			 * Log it and just setup the object.
 853			 * TODO need remove this WA in the future.
 854			 */
 855			if (ret == -EINVAL) {
 856				ret = __amdgpu_ras_feature_enable(adev, head, 1);
 857				if (!ret)
 858					dev_info(adev->dev,
 859						"RAS INFO: %s setup object\n",
 860						get_ras_block_str(head));
 861			}
 862		} else {
 863			/* setup the object then issue a ras TA disable cmd.*/
 864			ret = __amdgpu_ras_feature_enable(adev, head, 1);
 865			if (ret)
 866				return ret;
 867
 868			/* gfx block ras dsiable cmd must send to ras-ta */
 869			if (head->block == AMDGPU_RAS_BLOCK__GFX)
 870				con->features |= BIT(head->block);
 871
 872			ret = amdgpu_ras_feature_enable(adev, head, 0);
 873
 874			/* clean gfx block ras features flag */
 875			if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
 876				con->features &= ~BIT(head->block);
 877		}
 878	} else
 879		ret = amdgpu_ras_feature_enable(adev, head, enable);
 880
 881	return ret;
 882}
 883
 884static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
 885		bool bypass)
 886{
 887	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 888	struct ras_manager *obj, *tmp;
 889
 890	list_for_each_entry_safe(obj, tmp, &con->head, node) {
 891		/* bypass psp.
 892		 * aka just release the obj and corresponding flags
 893		 */
 894		if (bypass) {
 895			if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
 896				break;
 897		} else {
 898			if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
 899				break;
 900		}
 901	}
 902
 903	return con->features;
 904}
 905
 906static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
 907		bool bypass)
 908{
 909	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 910	int i;
 911	const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
 912
 913	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
 914		struct ras_common_if head = {
 915			.block = i,
 916			.type = default_ras_type,
 917			.sub_block_index = 0,
 918		};
 919
 920		if (i == AMDGPU_RAS_BLOCK__MCA)
 921			continue;
 922
 923		if (bypass) {
 924			/*
 925			 * bypass psp. vbios enable ras for us.
 926			 * so just create the obj
 927			 */
 928			if (__amdgpu_ras_feature_enable(adev, &head, 1))
 929				break;
 930		} else {
 931			if (amdgpu_ras_feature_enable(adev, &head, 1))
 932				break;
 933		}
 934	}
 935
 936	for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
 937		struct ras_common_if head = {
 938			.block = AMDGPU_RAS_BLOCK__MCA,
 939			.type = default_ras_type,
 940			.sub_block_index = i,
 941		};
 942
 943		if (bypass) {
 944			/*
 945			 * bypass psp. vbios enable ras for us.
 946			 * so just create the obj
 947			 */
 948			if (__amdgpu_ras_feature_enable(adev, &head, 1))
 949				break;
 950		} else {
 951			if (amdgpu_ras_feature_enable(adev, &head, 1))
 952				break;
 953		}
 954	}
 955
 956	return con->features;
 957}
 958/* feature ctl end */
 959
 960static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
 961		enum amdgpu_ras_block block)
 962{
 963	if (!block_obj)
 964		return -EINVAL;
 965
 966	if (block_obj->ras_comm.block == block)
 967		return 0;
 968
 969	return -EINVAL;
 970}
 971
 972static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
 973					enum amdgpu_ras_block block, uint32_t sub_block_index)
 974{
 975	struct amdgpu_ras_block_list *node, *tmp;
 976	struct amdgpu_ras_block_object *obj;
 977
 978	if (block >= AMDGPU_RAS_BLOCK__LAST)
 979		return NULL;
 980
 981	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 982		if (!node->ras_obj) {
 983			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
 984			continue;
 985		}
 986
 987		obj = node->ras_obj;
 988		if (obj->ras_block_match) {
 989			if (obj->ras_block_match(obj, block, sub_block_index) == 0)
 990				return obj;
 991		} else {
 992			if (amdgpu_ras_block_match_default(obj, block) == 0)
 993				return obj;
 994		}
 995	}
 996
 997	return NULL;
 998}
 999
1000static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
1001{
1002	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1003	int ret = 0;
1004
1005	/*
1006	 * choosing right query method according to
1007	 * whether smu support query error information
1008	 */
1009	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
1010	if (ret == -EOPNOTSUPP) {
1011		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1012			adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
1013			adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1014
1015		/* umc query_ras_error_address is also responsible for clearing
1016		 * error status
1017		 */
1018		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1019		    adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
1020			adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
1021	} else if (!ret) {
1022		if (adev->umc.ras &&
1023			adev->umc.ras->ecc_info_query_ras_error_count)
1024			adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
1025
1026		if (adev->umc.ras &&
1027			adev->umc.ras->ecc_info_query_ras_error_address)
1028			adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
1029	}
1030}
1031
1032static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
1033					      struct ras_manager *ras_mgr,
1034					      struct ras_err_data *err_data,
1035					      const char *blk_name,
1036					      bool is_ue)
1037{
1038	struct amdgpu_smuio_mcm_config_info *mcm_info;
1039	struct ras_err_node *err_node;
1040	struct ras_err_info *err_info;
1041
1042	if (is_ue) {
1043		for_each_ras_error(err_node, err_data) {
1044			err_info = &err_node->err_info;
1045			mcm_info = &err_info->mcm_info;
1046			if (err_info->ue_count) {
1047				dev_info(adev->dev, "socket: %d, die: %d, "
1048					 "%lld new uncorrectable hardware errors detected in %s block\n",
1049					 mcm_info->socket_id,
1050					 mcm_info->die_id,
1051					 err_info->ue_count,
1052					 blk_name);
1053			}
1054		}
1055
1056		for_each_ras_error(err_node, &ras_mgr->err_data) {
1057			err_info = &err_node->err_info;
1058			mcm_info = &err_info->mcm_info;
1059			dev_info(adev->dev, "socket: %d, die: %d, "
1060				 "%lld uncorrectable hardware errors detected in total in %s block\n",
1061				 mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
1062		}
1063
1064	} else {
1065		for_each_ras_error(err_node, err_data) {
1066			err_info = &err_node->err_info;
1067			mcm_info = &err_info->mcm_info;
1068			if (err_info->ce_count) {
1069				dev_info(adev->dev, "socket: %d, die: %d, "
1070					 "%lld new correctable hardware errors detected in %s block, "
1071					 "no user action is needed\n",
1072					 mcm_info->socket_id,
1073					 mcm_info->die_id,
1074					 err_info->ce_count,
1075					 blk_name);
1076			}
1077		}
1078
1079		for_each_ras_error(err_node, &ras_mgr->err_data) {
1080			err_info = &err_node->err_info;
1081			mcm_info = &err_info->mcm_info;
1082			dev_info(adev->dev, "socket: %d, die: %d, "
1083				 "%lld correctable hardware errors detected in total in %s block, "
1084				 "no user action is needed\n",
1085				 mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
1086		}
1087	}
1088}
1089
1090static inline bool err_data_has_source_info(struct ras_err_data *data)
1091{
1092	return !list_empty(&data->err_node_list);
1093}
1094
1095static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
1096					     struct ras_query_if *query_if,
1097					     struct ras_err_data *err_data)
1098{
1099	struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
1100	const char *blk_name = get_ras_block_str(&query_if->head);
1101
1102	if (err_data->ce_count) {
1103		if (err_data_has_source_info(err_data)) {
1104			amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false);
1105		} else if (!adev->aid_mask &&
1106			   adev->smuio.funcs &&
1107			   adev->smuio.funcs->get_socket_id &&
1108			   adev->smuio.funcs->get_die_id) {
1109			dev_info(adev->dev, "socket: %d, die: %d "
1110				 "%ld correctable hardware errors "
1111				 "detected in %s block, no user "
1112				 "action is needed.\n",
1113				 adev->smuio.funcs->get_socket_id(adev),
1114				 adev->smuio.funcs->get_die_id(adev),
1115				 ras_mgr->err_data.ce_count,
1116				 blk_name);
1117		} else {
1118			dev_info(adev->dev, "%ld correctable hardware errors "
1119				 "detected in %s block, no user "
1120				 "action is needed.\n",
1121				 ras_mgr->err_data.ce_count,
1122				 blk_name);
1123		}
1124	}
1125
1126	if (err_data->ue_count) {
1127		if (err_data_has_source_info(err_data)) {
1128			amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true);
1129		} else if (!adev->aid_mask &&
1130			   adev->smuio.funcs &&
1131			   adev->smuio.funcs->get_socket_id &&
1132			   adev->smuio.funcs->get_die_id) {
1133			dev_info(adev->dev, "socket: %d, die: %d "
1134				 "%ld uncorrectable hardware errors "
1135				 "detected in %s block\n",
1136				 adev->smuio.funcs->get_socket_id(adev),
1137				 adev->smuio.funcs->get_die_id(adev),
1138				 ras_mgr->err_data.ue_count,
1139				 blk_name);
1140		} else {
1141			dev_info(adev->dev, "%ld uncorrectable hardware errors "
1142				 "detected in %s block\n",
1143				 ras_mgr->err_data.ue_count,
1144				 blk_name);
1145		}
1146	}
1147
1148}
1149
1150static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
1151{
1152	struct ras_err_node *err_node;
1153	struct ras_err_info *err_info;
1154
1155	if (err_data_has_source_info(err_data)) {
1156		for_each_ras_error(err_node, err_data) {
1157			err_info = &err_node->err_info;
1158
1159			amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
1160			amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
1161		}
1162	} else {
1163		/* for legacy asic path which doesn't has error source info */
1164		obj->err_data.ue_count += err_data->ue_count;
1165		obj->err_data.ce_count += err_data->ce_count;
1166	}
1167}
1168
1169static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
1170						struct ras_query_if *info,
1171						struct ras_err_data *err_data,
1172						unsigned int error_query_mode)
1173{
1174	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
1175	struct amdgpu_ras_block_object *block_obj = NULL;
1176
1177	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
1178		return -EINVAL;
1179
1180	if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
1181		if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
1182			amdgpu_ras_get_ecc_info(adev, err_data);
1183		} else {
1184			block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
1185			if (!block_obj || !block_obj->hw_ops) {
1186				dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1187					     get_ras_block_str(&info->head));
1188				return -EINVAL;
1189			}
1190
1191			if (block_obj->hw_ops->query_ras_error_count)
1192				block_obj->hw_ops->query_ras_error_count(adev, err_data);
1193
1194			if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
1195			    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
1196			    (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
1197				if (block_obj->hw_ops->query_ras_error_status)
1198					block_obj->hw_ops->query_ras_error_status(adev);
1199			}
1200		}
1201	} else {
1202		/* FIXME: add code to check return value later */
1203		amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data);
1204		amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data);
1205	}
1206
1207	return 0;
1208}
1209
1210/* query/inject/cure begin */
1211int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
1212{
1213	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1214	struct ras_err_data err_data;
1215	unsigned int error_query_mode;
1216	int ret;
1217
1218	if (!obj)
1219		return -EINVAL;
1220
1221	ret = amdgpu_ras_error_data_init(&err_data);
1222	if (ret)
1223		return ret;
1224
1225	if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
1226		return -EINVAL;
1227
1228	ret = amdgpu_ras_query_error_status_helper(adev, info,
1229						   &err_data,
1230						   error_query_mode);
1231	if (ret)
1232		goto out_fini_err_data;
1233
1234	amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
1235
1236	info->ue_count = obj->err_data.ue_count;
1237	info->ce_count = obj->err_data.ce_count;
1238
1239	amdgpu_ras_error_generate_report(adev, info, &err_data);
1240
1241out_fini_err_data:
1242	amdgpu_ras_error_data_fini(&err_data);
1243
1244	return ret;
1245}
1246
1247int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
1248		enum amdgpu_ras_block block)
1249{
1250	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
1251	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1252	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
1253	struct amdgpu_hive_info *hive;
1254	int hive_ras_recovery = 0;
1255
1256	if (!block_obj || !block_obj->hw_ops) {
1257		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1258				ras_block_str(block));
1259		return -EOPNOTSUPP;
1260	}
1261
1262	if (!amdgpu_ras_is_supported(adev, block) ||
1263	    !amdgpu_ras_get_mca_debug_mode(adev))
1264		return -EOPNOTSUPP;
1265
1266	hive = amdgpu_get_xgmi_hive(adev);
1267	if (hive) {
1268		hive_ras_recovery = atomic_read(&hive->ras_recovery);
1269		amdgpu_put_xgmi_hive(hive);
1270	}
1271
1272	/* skip ras error reset in gpu reset */
1273	if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
1274	    hive_ras_recovery) &&
1275	    mca_funcs && mca_funcs->mca_set_debug_mode)
1276		return -EOPNOTSUPP;
1277
1278	if (block_obj->hw_ops->reset_ras_error_count)
1279		block_obj->hw_ops->reset_ras_error_count(adev);
1280
1281	return 0;
1282}
1283
1284int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
1285		enum amdgpu_ras_block block)
1286{
1287	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
1288
1289	if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
1290		return 0;
1291
1292	if ((block == AMDGPU_RAS_BLOCK__GFX) ||
1293	    (block == AMDGPU_RAS_BLOCK__MMHUB)) {
1294		if (block_obj->hw_ops->reset_ras_error_status)
1295			block_obj->hw_ops->reset_ras_error_status(adev);
1296	}
1297
1298	return 0;
1299}
1300
1301/* wrapper of psp_ras_trigger_error */
1302int amdgpu_ras_error_inject(struct amdgpu_device *adev,
1303		struct ras_inject_if *info)
1304{
1305	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1306	struct ta_ras_trigger_error_input block_info = {
1307		.block_id =  amdgpu_ras_block_to_ta(info->head.block),
1308		.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
1309		.sub_block_index = info->head.sub_block_index,
1310		.address = info->address,
1311		.value = info->value,
1312	};
1313	int ret = -EINVAL;
1314	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
1315							info->head.block,
1316							info->head.sub_block_index);
1317
1318	/* inject on guest isn't allowed, return success directly */
1319	if (amdgpu_sriov_vf(adev))
1320		return 0;
1321
1322	if (!obj)
1323		return -EINVAL;
1324
1325	if (!block_obj || !block_obj->hw_ops)	{
1326		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1327			     get_ras_block_str(&info->head));
1328		return -EINVAL;
1329	}
1330
1331	/* Calculate XGMI relative offset */
1332	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
1333	    info->head.block != AMDGPU_RAS_BLOCK__GFX) {
1334		block_info.address =
1335			amdgpu_xgmi_get_relative_phy_addr(adev,
1336							  block_info.address);
1337	}
1338
1339	if (block_obj->hw_ops->ras_error_inject) {
1340		if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
1341			ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
1342		else /* Special ras_error_inject is defined (e.g: xgmi) */
1343			ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
1344						info->instance_mask);
1345	} else {
1346		/* default path */
1347		ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
1348	}
1349
1350	if (ret)
1351		dev_err(adev->dev, "ras inject %s failed %d\n",
1352			get_ras_block_str(&info->head), ret);
1353
1354	return ret;
1355}
1356
1357/**
1358 * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
1359 * @adev: pointer to AMD GPU device
1360 * @ce_count: pointer to an integer to be set to the count of correctible errors.
1361 * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
1362 * @query_info: pointer to ras_query_if
1363 *
1364 * Return 0 for query success or do nothing, otherwise return an error
1365 * on failures
1366 */
1367static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
1368					       unsigned long *ce_count,
1369					       unsigned long *ue_count,
1370					       struct ras_query_if *query_info)
1371{
1372	int ret;
1373
1374	if (!query_info)
1375		/* do nothing if query_info is not specified */
1376		return 0;
1377
1378	ret = amdgpu_ras_query_error_status(adev, query_info);
1379	if (ret)
1380		return ret;
1381
1382	*ce_count += query_info->ce_count;
1383	*ue_count += query_info->ue_count;
1384
1385	/* some hardware/IP supports read to clear
1386	 * no need to explictly reset the err status after the query call */
1387	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
1388	    amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
1389		if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
1390			dev_warn(adev->dev,
1391				 "Failed to reset error counter and error status\n");
1392	}
1393
1394	return 0;
1395}
1396
1397/**
1398 * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
1399 * @adev: pointer to AMD GPU device
1400 * @ce_count: pointer to an integer to be set to the count of correctible errors.
1401 * @ue_count: pointer to an integer to be set to the count of uncorrectible
1402 * errors.
1403 * @query_info: pointer to ras_query_if if the query request is only for
1404 * specific ip block; if info is NULL, then the qurey request is for
1405 * all the ip blocks that support query ras error counters/status
1406 *
1407 * If set, @ce_count or @ue_count, count and return the corresponding
1408 * error counts in those integer pointers. Return 0 if the device
1409 * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
1410 */
1411int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
1412				 unsigned long *ce_count,
1413				 unsigned long *ue_count,
1414				 struct ras_query_if *query_info)
1415{
1416	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1417	struct ras_manager *obj;
1418	unsigned long ce, ue;
1419	int ret;
1420
1421	if (!adev->ras_enabled || !con)
1422		return -EOPNOTSUPP;
1423
1424	/* Don't count since no reporting.
1425	 */
1426	if (!ce_count && !ue_count)
1427		return 0;
1428
1429	ce = 0;
1430	ue = 0;
1431	if (!query_info) {
1432		/* query all the ip blocks that support ras query interface */
1433		list_for_each_entry(obj, &con->head, node) {
1434			struct ras_query_if info = {
1435				.head = obj->head,
1436			};
1437
1438			ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
1439		}
1440	} else {
1441		/* query specific ip block */
1442		ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
1443	}
1444
1445	if (ret)
1446		return ret;
1447
1448	if (ce_count)
1449		*ce_count = ce;
1450
1451	if (ue_count)
1452		*ue_count = ue;
1453
1454	return 0;
1455}
1456/* query/inject/cure end */
1457
1458
1459/* sysfs begin */
1460
1461static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1462		struct ras_badpage **bps, unsigned int *count);
1463
1464static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
1465{
1466	switch (flags) {
1467	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
1468		return "R";
1469	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
1470		return "P";
1471	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
1472	default:
1473		return "F";
1474	}
1475}
1476
1477/**
1478 * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
1479 *
1480 * It allows user to read the bad pages of vram on the gpu through
1481 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
1482 *
1483 * It outputs multiple lines, and each line stands for one gpu page.
1484 *
1485 * The format of one line is below,
1486 * gpu pfn : gpu page size : flags
1487 *
1488 * gpu pfn and gpu page size are printed in hex format.
1489 * flags can be one of below character,
1490 *
1491 * R: reserved, this gpu page is reserved and not able to use.
1492 *
1493 * P: pending for reserve, this gpu page is marked as bad, will be reserved
1494 * in next window of page_reserve.
1495 *
1496 * F: unable to reserve. this gpu page can't be reserved due to some reasons.
1497 *
1498 * Examples:
1499 *
1500 * .. code-block:: bash
1501 *
1502 *	0x00000001 : 0x00001000 : R
1503 *	0x00000002 : 0x00001000 : P
1504 *
1505 */
1506
1507static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
1508		struct kobject *kobj, struct bin_attribute *attr,
1509		char *buf, loff_t ppos, size_t count)
1510{
1511	struct amdgpu_ras *con =
1512		container_of(attr, struct amdgpu_ras, badpages_attr);
1513	struct amdgpu_device *adev = con->adev;
1514	const unsigned int element_size =
1515		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
1516	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
1517	unsigned int end = div64_ul(ppos + count - 1, element_size);
1518	ssize_t s = 0;
1519	struct ras_badpage *bps = NULL;
1520	unsigned int bps_count = 0;
1521
1522	memset(buf, 0, count);
1523
1524	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
1525		return 0;
1526
1527	for (; start < end && start < bps_count; start++)
1528		s += scnprintf(&buf[s], element_size + 1,
1529				"0x%08x : 0x%08x : %1s\n",
1530				bps[start].bp,
1531				bps[start].size,
1532				amdgpu_ras_badpage_flags_str(bps[start].flags));
1533
1534	kfree(bps);
1535
1536	return s;
1537}
1538
1539static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
1540		struct device_attribute *attr, char *buf)
1541{
1542	struct amdgpu_ras *con =
1543		container_of(attr, struct amdgpu_ras, features_attr);
1544
1545	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
1546}
1547
1548static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
1549		struct device_attribute *attr, char *buf)
1550{
1551	struct amdgpu_ras *con =
1552		container_of(attr, struct amdgpu_ras, version_attr);
1553	return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
1554}
1555
1556static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
1557		struct device_attribute *attr, char *buf)
1558{
1559	struct amdgpu_ras *con =
1560		container_of(attr, struct amdgpu_ras, schema_attr);
1561	return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
1562}
1563
1564static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
1565{
1566	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1567
1568	if (adev->dev->kobj.sd)
1569		sysfs_remove_file_from_group(&adev->dev->kobj,
1570				&con->badpages_attr.attr,
1571				RAS_FS_NAME);
1572}
1573
1574static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
1575{
1576	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1577	struct attribute *attrs[] = {
1578		&con->features_attr.attr,
1579		&con->version_attr.attr,
1580		&con->schema_attr.attr,
1581		NULL
1582	};
1583	struct attribute_group group = {
1584		.name = RAS_FS_NAME,
1585		.attrs = attrs,
1586	};
1587
1588	if (adev->dev->kobj.sd)
1589		sysfs_remove_group(&adev->dev->kobj, &group);
1590
1591	return 0;
1592}
1593
1594int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
1595		struct ras_common_if *head)
1596{
1597	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1598
1599	if (!obj || obj->attr_inuse)
1600		return -EINVAL;
1601
1602	get_obj(obj);
1603
1604	snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
1605		"%s_err_count", head->name);
1606
1607	obj->sysfs_attr = (struct device_attribute){
1608		.attr = {
1609			.name = obj->fs_data.sysfs_name,
1610			.mode = S_IRUGO,
1611		},
1612			.show = amdgpu_ras_sysfs_read,
1613	};
1614	sysfs_attr_init(&obj->sysfs_attr.attr);
1615
1616	if (sysfs_add_file_to_group(&adev->dev->kobj,
1617				&obj->sysfs_attr.attr,
1618				RAS_FS_NAME)) {
1619		put_obj(obj);
1620		return -EINVAL;
1621	}
1622
1623	obj->attr_inuse = 1;
1624
1625	return 0;
1626}
1627
1628int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
1629		struct ras_common_if *head)
1630{
1631	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1632
1633	if (!obj || !obj->attr_inuse)
1634		return -EINVAL;
1635
1636	if (adev->dev->kobj.sd)
1637		sysfs_remove_file_from_group(&adev->dev->kobj,
1638				&obj->sysfs_attr.attr,
1639				RAS_FS_NAME);
1640	obj->attr_inuse = 0;
1641	put_obj(obj);
1642
1643	return 0;
1644}
1645
1646static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
1647{
1648	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1649	struct ras_manager *obj, *tmp;
1650
1651	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1652		amdgpu_ras_sysfs_remove(adev, &obj->head);
1653	}
1654
1655	if (amdgpu_bad_page_threshold != 0)
1656		amdgpu_ras_sysfs_remove_bad_page_node(adev);
1657
1658	amdgpu_ras_sysfs_remove_dev_attr_node(adev);
1659
1660	return 0;
1661}
1662/* sysfs end */
1663
1664/**
1665 * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
1666 *
1667 * Normally when there is an uncorrectable error, the driver will reset
1668 * the GPU to recover.  However, in the event of an unrecoverable error,
1669 * the driver provides an interface to reboot the system automatically
1670 * in that event.
1671 *
1672 * The following file in debugfs provides that interface:
1673 * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1674 *
1675 * Usage:
1676 *
1677 * .. code-block:: bash
1678 *
1679 *	echo true > .../ras/auto_reboot
1680 *
1681 */
1682/* debugfs begin */
1683static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
1684{
1685	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1686	struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
1687	struct drm_minor  *minor = adev_to_drm(adev)->primary;
1688	struct dentry     *dir;
1689
1690	dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
1691	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
1692			    &amdgpu_ras_debugfs_ctrl_ops);
1693	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
1694			    &amdgpu_ras_debugfs_eeprom_ops);
1695	debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
1696			   &con->bad_page_cnt_threshold);
1697	debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
1698	debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
1699	debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
1700	debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
1701			    &amdgpu_ras_debugfs_eeprom_size_ops);
1702	con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
1703						       S_IRUGO, dir, adev,
1704						       &amdgpu_ras_debugfs_eeprom_table_ops);
1705	amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
1706
1707	/*
1708	 * After one uncorrectable error happens, usually GPU recovery will
1709	 * be scheduled. But due to the known problem in GPU recovery failing
1710	 * to bring GPU back, below interface provides one direct way to
1711	 * user to reboot system automatically in such case within
1712	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
1713	 * will never be called.
1714	 */
1715	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
1716
1717	/*
1718	 * User could set this not to clean up hardware's error count register
1719	 * of RAS IPs during ras recovery.
1720	 */
1721	debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
1722			    &con->disable_ras_err_cnt_harvest);
1723	return dir;
1724}
1725
1726static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
1727				      struct ras_fs_if *head,
1728				      struct dentry *dir)
1729{
1730	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1731
1732	if (!obj || !dir)
1733		return;
1734
1735	get_obj(obj);
1736
1737	memcpy(obj->fs_data.debugfs_name,
1738			head->debugfs_name,
1739			sizeof(obj->fs_data.debugfs_name));
1740
1741	debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
1742			    obj, &amdgpu_ras_debugfs_ops);
1743}
1744
1745void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
1746{
1747	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1748	struct dentry *dir;
1749	struct ras_manager *obj;
1750	struct ras_fs_if fs_info;
1751
1752	/*
1753	 * it won't be called in resume path, no need to check
1754	 * suspend and gpu reset status
1755	 */
1756	if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
1757		return;
1758
1759	dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
1760
1761	list_for_each_entry(obj, &con->head, node) {
1762		if (amdgpu_ras_is_supported(adev, obj->head.block) &&
1763			(obj->attr_inuse == 1)) {
1764			sprintf(fs_info.debugfs_name, "%s_err_inject",
1765					get_ras_block_str(&obj->head));
1766			fs_info.head = obj->head;
1767			amdgpu_ras_debugfs_create(adev, &fs_info, dir);
1768		}
1769	}
1770
1771	amdgpu_mca_smu_debugfs_init(adev, dir);
1772}
1773
1774/* debugfs end */
1775
1776/* ras fs */
1777static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
1778		amdgpu_ras_sysfs_badpages_read, NULL, 0);
1779static DEVICE_ATTR(features, S_IRUGO,
1780		amdgpu_ras_sysfs_features_read, NULL);
1781static DEVICE_ATTR(version, 0444,
1782		amdgpu_ras_sysfs_version_show, NULL);
1783static DEVICE_ATTR(schema, 0444,
1784		amdgpu_ras_sysfs_schema_show, NULL);
1785static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1786{
1787	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1788	struct attribute_group group = {
1789		.name = RAS_FS_NAME,
1790	};
1791	struct attribute *attrs[] = {
1792		&con->features_attr.attr,
1793		&con->version_attr.attr,
1794		&con->schema_attr.attr,
1795		NULL
1796	};
1797	struct bin_attribute *bin_attrs[] = {
1798		NULL,
1799		NULL,
1800	};
1801	int r;
1802
1803	group.attrs = attrs;
1804
1805	/* add features entry */
1806	con->features_attr = dev_attr_features;
1807	sysfs_attr_init(attrs[0]);
1808
1809	/* add version entry */
1810	con->version_attr = dev_attr_version;
1811	sysfs_attr_init(attrs[1]);
1812
1813	/* add schema entry */
1814	con->schema_attr = dev_attr_schema;
1815	sysfs_attr_init(attrs[2]);
1816
1817	if (amdgpu_bad_page_threshold != 0) {
1818		/* add bad_page_features entry */
1819		bin_attr_gpu_vram_bad_pages.private = NULL;
1820		con->badpages_attr = bin_attr_gpu_vram_bad_pages;
1821		bin_attrs[0] = &con->badpages_attr;
1822		group.bin_attrs = bin_attrs;
1823		sysfs_bin_attr_init(bin_attrs[0]);
1824	}
1825
1826	r = sysfs_create_group(&adev->dev->kobj, &group);
1827	if (r)
1828		dev_err(adev->dev, "Failed to create RAS sysfs group!");
1829
1830	return 0;
1831}
1832
1833static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
1834{
1835	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1836	struct ras_manager *con_obj, *ip_obj, *tmp;
1837
1838	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
1839		list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
1840			ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
1841			if (ip_obj)
1842				put_obj(ip_obj);
1843		}
1844	}
1845
1846	amdgpu_ras_sysfs_remove_all(adev);
1847	return 0;
1848}
1849/* ras fs end */
1850
1851/* ih begin */
1852
1853/* For the hardware that cannot enable bif ring for both ras_controller_irq
1854 * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
1855 * register to check whether the interrupt is triggered or not, and properly
1856 * ack the interrupt if it is there
1857 */
1858void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
1859{
1860	/* Fatal error events are handled on host side */
1861	if (amdgpu_sriov_vf(adev))
1862		return;
1863
1864	if (adev->nbio.ras &&
1865	    adev->nbio.ras->handle_ras_controller_intr_no_bifring)
1866		adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
1867
1868	if (adev->nbio.ras &&
1869	    adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
1870		adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
1871}
1872
1873static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
1874				struct amdgpu_iv_entry *entry)
1875{
1876	bool poison_stat = false;
1877	struct amdgpu_device *adev = obj->adev;
1878	struct amdgpu_ras_block_object *block_obj =
1879		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
1880
1881	if (!block_obj)
1882		return;
1883
1884	/* both query_poison_status and handle_poison_consumption are optional,
1885	 * but at least one of them should be implemented if we need poison
1886	 * consumption handler
1887	 */
1888	if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
1889		poison_stat = block_obj->hw_ops->query_poison_status(adev);
1890		if (!poison_stat) {
1891			/* Not poison consumption interrupt, no need to handle it */
1892			dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
1893					block_obj->ras_comm.name);
1894
1895			return;
1896		}
1897	}
1898
1899	amdgpu_umc_poison_handler(adev, false);
1900
1901	if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
1902		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
1903
1904	/* gpu reset is fallback for failed and default cases */
1905	if (poison_stat) {
1906		dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
1907				block_obj->ras_comm.name);
1908		amdgpu_ras_reset_gpu(adev);
1909	} else {
1910		amdgpu_gfx_poison_consumption_handler(adev, entry);
1911	}
1912}
1913
1914static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
1915				struct amdgpu_iv_entry *entry)
1916{
1917	dev_info(obj->adev->dev,
1918		"Poison is created, no user action is needed.\n");
1919}
1920
1921static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
1922				struct amdgpu_iv_entry *entry)
1923{
1924	struct ras_ih_data *data = &obj->ih_data;
1925	struct ras_err_data err_data;
1926	int ret;
1927
1928	if (!data->cb)
1929		return;
1930
1931	ret = amdgpu_ras_error_data_init(&err_data);
1932	if (ret)
1933		return;
1934
1935	/* Let IP handle its data, maybe we need get the output
1936	 * from the callback to update the error type/count, etc
1937	 */
1938	ret = data->cb(obj->adev, &err_data, entry);
1939	/* ue will trigger an interrupt, and in that case
1940	 * we need do a reset to recovery the whole system.
1941	 * But leave IP do that recovery, here we just dispatch
1942	 * the error.
1943	 */
1944	if (ret == AMDGPU_RAS_SUCCESS) {
1945		/* these counts could be left as 0 if
1946		 * some blocks do not count error number
1947		 */
1948		obj->err_data.ue_count += err_data.ue_count;
1949		obj->err_data.ce_count += err_data.ce_count;
1950	}
1951
1952	amdgpu_ras_error_data_fini(&err_data);
1953}
1954
1955static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
1956{
1957	struct ras_ih_data *data = &obj->ih_data;
1958	struct amdgpu_iv_entry entry;
1959
1960	while (data->rptr != data->wptr) {
1961		rmb();
1962		memcpy(&entry, &data->ring[data->rptr],
1963				data->element_size);
1964
1965		wmb();
1966		data->rptr = (data->aligned_element_size +
1967				data->rptr) % data->ring_size;
1968
1969		if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
1970			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
1971				amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
1972			else
1973				amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
1974		} else {
1975			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
1976				amdgpu_ras_interrupt_umc_handler(obj, &entry);
1977			else
1978				dev_warn(obj->adev->dev,
1979					"No RAS interrupt handler for non-UMC block with poison disabled.\n");
1980		}
1981	}
1982}
1983
1984static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
1985{
1986	struct ras_ih_data *data =
1987		container_of(work, struct ras_ih_data, ih_work);
1988	struct ras_manager *obj =
1989		container_of(data, struct ras_manager, ih_data);
1990
1991	amdgpu_ras_interrupt_handler(obj);
1992}
1993
1994int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
1995		struct ras_dispatch_if *info)
1996{
1997	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1998	struct ras_ih_data *data = &obj->ih_data;
1999
2000	if (!obj)
2001		return -EINVAL;
2002
2003	if (data->inuse == 0)
2004		return 0;
2005
2006	/* Might be overflow... */
2007	memcpy(&data->ring[data->wptr], info->entry,
2008			data->element_size);
2009
2010	wmb();
2011	data->wptr = (data->aligned_element_size +
2012			data->wptr) % data->ring_size;
2013
2014	schedule_work(&data->ih_work);
2015
2016	return 0;
2017}
2018
2019int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
2020		struct ras_common_if *head)
2021{
2022	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
2023	struct ras_ih_data *data;
2024
2025	if (!obj)
2026		return -EINVAL;
2027
2028	data = &obj->ih_data;
2029	if (data->inuse == 0)
2030		return 0;
2031
2032	cancel_work_sync(&data->ih_work);
2033
2034	kfree(data->ring);
2035	memset(data, 0, sizeof(*data));
2036	put_obj(obj);
2037
2038	return 0;
2039}
2040
2041int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
2042		struct ras_common_if *head)
2043{
2044	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
2045	struct ras_ih_data *data;
2046	struct amdgpu_ras_block_object *ras_obj;
2047
2048	if (!obj) {
2049		/* in case we registe the IH before enable ras feature */
2050		obj = amdgpu_ras_create_obj(adev, head);
2051		if (!obj)
2052			return -EINVAL;
2053	} else
2054		get_obj(obj);
2055
2056	ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
2057
2058	data = &obj->ih_data;
2059	/* add the callback.etc */
2060	*data = (struct ras_ih_data) {
2061		.inuse = 0,
2062		.cb = ras_obj->ras_cb,
2063		.element_size = sizeof(struct amdgpu_iv_entry),
2064		.rptr = 0,
2065		.wptr = 0,
2066	};
2067
2068	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
2069
2070	data->aligned_element_size = ALIGN(data->element_size, 8);
2071	/* the ring can store 64 iv entries. */
2072	data->ring_size = 64 * data->aligned_element_size;
2073	data->ring = kmalloc(data->ring_size, GFP_KERNEL);
2074	if (!data->ring) {
2075		put_obj(obj);
2076		return -ENOMEM;
2077	}
2078
2079	/* IH is ready */
2080	data->inuse = 1;
2081
2082	return 0;
2083}
2084
2085static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
2086{
2087	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2088	struct ras_manager *obj, *tmp;
2089
2090	list_for_each_entry_safe(obj, tmp, &con->head, node) {
2091		amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
2092	}
2093
2094	return 0;
2095}
2096/* ih end */
2097
2098/* traversal all IPs except NBIO to query error counter */
2099static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
2100{
2101	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2102	struct ras_manager *obj;
2103
2104	if (!adev->ras_enabled || !con)
2105		return;
2106
2107	list_for_each_entry(obj, &con->head, node) {
2108		struct ras_query_if info = {
2109			.head = obj->head,
2110		};
2111
2112		/*
2113		 * PCIE_BIF IP has one different isr by ras controller
2114		 * interrupt, the specific ras counter query will be
2115		 * done in that isr. So skip such block from common
2116		 * sync flood interrupt isr calling.
2117		 */
2118		if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
2119			continue;
2120
2121		/*
2122		 * this is a workaround for aldebaran, skip send msg to
2123		 * smu to get ecc_info table due to smu handle get ecc
2124		 * info table failed temporarily.
2125		 * should be removed until smu fix handle ecc_info table.
2126		 */
2127		if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
2128		    (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
2129		     IP_VERSION(13, 0, 2)))
2130			continue;
2131
2132		amdgpu_ras_query_error_status(adev, &info);
2133
2134		if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2135			    IP_VERSION(11, 0, 2) &&
2136		    amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2137			    IP_VERSION(11, 0, 4) &&
2138		    amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2139			    IP_VERSION(13, 0, 0)) {
2140			if (amdgpu_ras_reset_error_status(adev, info.head.block))
2141				dev_warn(adev->dev, "Failed to reset error counter and error status");
2142		}
2143	}
2144}
2145
2146/* Parse RdRspStatus and WrRspStatus */
2147static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
2148					  struct ras_query_if *info)
2149{
2150	struct amdgpu_ras_block_object *block_obj;
2151	/*
2152	 * Only two block need to query read/write
2153	 * RspStatus at current state
2154	 */
2155	if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
2156		(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
2157		return;
2158
2159	block_obj = amdgpu_ras_get_ras_block(adev,
2160					info->head.block,
2161					info->head.sub_block_index);
2162
2163	if (!block_obj || !block_obj->hw_ops) {
2164		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
2165			     get_ras_block_str(&info->head));
2166		return;
2167	}
2168
2169	if (block_obj->hw_ops->query_ras_error_status)
2170		block_obj->hw_ops->query_ras_error_status(adev);
2171
2172}
2173
2174static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
2175{
2176	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2177	struct ras_manager *obj;
2178
2179	if (!adev->ras_enabled || !con)
2180		return;
2181
2182	list_for_each_entry(obj, &con->head, node) {
2183		struct ras_query_if info = {
2184			.head = obj->head,
2185		};
2186
2187		amdgpu_ras_error_status_query(adev, &info);
2188	}
2189}
2190
2191/* recovery begin */
2192
2193/* return 0 on success.
2194 * caller need free bps.
2195 */
2196static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
2197		struct ras_badpage **bps, unsigned int *count)
2198{
2199	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2200	struct ras_err_handler_data *data;
2201	int i = 0;
2202	int ret = 0, status;
2203
2204	if (!con || !con->eh_data || !bps || !count)
2205		return -EINVAL;
2206
2207	mutex_lock(&con->recovery_lock);
2208	data = con->eh_data;
2209	if (!data || data->count == 0) {
2210		*bps = NULL;
2211		ret = -EINVAL;
2212		goto out;
2213	}
2214
2215	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
2216	if (!*bps) {
2217		ret = -ENOMEM;
2218		goto out;
2219	}
2220
2221	for (; i < data->count; i++) {
2222		(*bps)[i] = (struct ras_badpage){
2223			.bp = data->bps[i].retired_page,
2224			.size = AMDGPU_GPU_PAGE_SIZE,
2225			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
2226		};
2227		status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
2228				data->bps[i].retired_page);
2229		if (status == -EBUSY)
2230			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
2231		else if (status == -ENOENT)
2232			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
2233	}
2234
2235	*count = data->count;
2236out:
2237	mutex_unlock(&con->recovery_lock);
2238	return ret;
2239}
2240
2241static void amdgpu_ras_do_recovery(struct work_struct *work)
2242{
2243	struct amdgpu_ras *ras =
2244		container_of(work, struct amdgpu_ras, recovery_work);
2245	struct amdgpu_device *remote_adev = NULL;
2246	struct amdgpu_device *adev = ras->adev;
2247	struct list_head device_list, *device_list_handle =  NULL;
2248	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2249
2250	if (hive)
2251		atomic_set(&hive->ras_recovery, 1);
2252	if (!ras->disable_ras_err_cnt_harvest) {
2253
2254		/* Build list of devices to query RAS related errors */
2255		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
2256			device_list_handle = &hive->device_list;
2257		} else {
2258			INIT_LIST_HEAD(&device_list);
2259			list_add_tail(&adev->gmc.xgmi.head, &device_list);
2260			device_list_handle = &device_list;
2261		}
2262
2263		list_for_each_entry(remote_adev,
2264				device_list_handle, gmc.xgmi.head) {
2265			amdgpu_ras_query_err_status(remote_adev);
2266			amdgpu_ras_log_on_err_counter(remote_adev);
2267		}
2268
2269	}
2270
2271	if (amdgpu_device_should_recover_gpu(ras->adev)) {
2272		struct amdgpu_reset_context reset_context;
2273		memset(&reset_context, 0, sizeof(reset_context));
2274
2275		reset_context.method = AMD_RESET_METHOD_NONE;
2276		reset_context.reset_req_dev = adev;
2277
2278		/* Perform full reset in fatal error mode */
2279		if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
2280			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2281		else {
2282			clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2283
2284			if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
2285				ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
2286				reset_context.method = AMD_RESET_METHOD_MODE2;
2287			}
2288
2289			/* Fatal error occurs in poison mode, mode1 reset is used to
2290			 * recover gpu.
2291			 */
2292			if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
2293				ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
2294				set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2295
2296				psp_fatal_error_recovery_quirk(&adev->psp);
2297			}
2298		}
2299
2300		amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
2301	}
2302	atomic_set(&ras->in_recovery, 0);
2303	if (hive) {
2304		atomic_set(&hive->ras_recovery, 0);
2305		amdgpu_put_xgmi_hive(hive);
2306	}
2307}
2308
2309/* alloc/realloc bps array */
2310static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
2311		struct ras_err_handler_data *data, int pages)
2312{
2313	unsigned int old_space = data->count + data->space_left;
2314	unsigned int new_space = old_space + pages;
2315	unsigned int align_space = ALIGN(new_space, 512);
2316	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
2317
2318	if (!bps) {
2319		return -ENOMEM;
2320	}
2321
2322	if (data->bps) {
2323		memcpy(bps, data->bps,
2324				data->count * sizeof(*data->bps));
2325		kfree(data->bps);
2326	}
2327
2328	data->bps = bps;
2329	data->space_left += align_space - old_space;
2330	return 0;
2331}
2332
2333/* it deal with vram only. */
2334int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
2335		struct eeprom_table_record *bps, int pages)
2336{
2337	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2338	struct ras_err_handler_data *data;
2339	int ret = 0;
2340	uint32_t i;
2341
2342	if (!con || !con->eh_data || !bps || pages <= 0)
2343		return 0;
2344
2345	mutex_lock(&con->recovery_lock);
2346	data = con->eh_data;
2347	if (!data)
2348		goto out;
2349
2350	for (i = 0; i < pages; i++) {
2351		if (amdgpu_ras_check_bad_page_unlock(con,
2352			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
2353			continue;
2354
2355		if (!data->space_left &&
2356			amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
2357			ret = -ENOMEM;
2358			goto out;
2359		}
2360
2361		amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
2362			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
2363			AMDGPU_GPU_PAGE_SIZE);
2364
2365		memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
2366		data->count++;
2367		data->space_left--;
2368	}
2369out:
2370	mutex_unlock(&con->recovery_lock);
2371
2372	return ret;
2373}
2374
2375/*
2376 * write error record array to eeprom, the function should be
2377 * protected by recovery_lock
2378 * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
2379 */
2380int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
2381		unsigned long *new_cnt)
2382{
2383	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2384	struct ras_err_handler_data *data;
2385	struct amdgpu_ras_eeprom_control *control;
2386	int save_count;
2387
2388	if (!con || !con->eh_data) {
2389		if (new_cnt)
2390			*new_cnt = 0;
2391
2392		return 0;
2393	}
2394
2395	mutex_lock(&con->recovery_lock);
2396	control = &con->eeprom_control;
2397	data = con->eh_data;
2398	save_count = data->count - control->ras_num_recs;
2399	mutex_unlock(&con->recovery_lock);
2400
2401	if (new_cnt)
2402		*new_cnt = save_count / adev->umc.retire_unit;
2403
2404	/* only new entries are saved */
2405	if (save_count > 0) {
2406		if (amdgpu_ras_eeprom_append(control,
2407					     &data->bps[control->ras_num_recs],
2408					     save_count)) {
2409			dev_err(adev->dev, "Failed to save EEPROM table data!");
2410			return -EIO;
2411		}
2412
2413		dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
2414	}
2415
2416	return 0;
2417}
2418
2419/*
2420 * read error record array in eeprom and reserve enough space for
2421 * storing new bad pages
2422 */
2423static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
2424{
2425	struct amdgpu_ras_eeprom_control *control =
2426		&adev->psp.ras_context.ras->eeprom_control;
2427	struct eeprom_table_record *bps;
2428	int ret;
2429
2430	/* no bad page record, skip eeprom access */
2431	if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
2432		return 0;
2433
2434	bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
2435	if (!bps)
2436		return -ENOMEM;
2437
2438	ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
2439	if (ret)
2440		dev_err(adev->dev, "Failed to load EEPROM table records!");
2441	else
2442		ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
2443
2444	kfree(bps);
2445	return ret;
2446}
2447
2448static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
2449				uint64_t addr)
2450{
2451	struct ras_err_handler_data *data = con->eh_data;
2452	int i;
2453
2454	addr >>= AMDGPU_GPU_PAGE_SHIFT;
2455	for (i = 0; i < data->count; i++)
2456		if (addr == data->bps[i].retired_page)
2457			return true;
2458
2459	return false;
2460}
2461
2462/*
2463 * check if an address belongs to bad page
2464 *
2465 * Note: this check is only for umc block
2466 */
2467static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
2468				uint64_t addr)
2469{
2470	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2471	bool ret = false;
2472
2473	if (!con || !con->eh_data)
2474		return ret;
2475
2476	mutex_lock(&con->recovery_lock);
2477	ret = amdgpu_ras_check_bad_page_unlock(con, addr);
2478	mutex_unlock(&con->recovery_lock);
2479	return ret;
2480}
2481
2482static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
2483					  uint32_t max_count)
2484{
2485	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2486
2487	/*
2488	 * Justification of value bad_page_cnt_threshold in ras structure
2489	 *
2490	 * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
2491	 * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
2492	 * scenarios accordingly.
2493	 *
2494	 * Bad page retirement enablement:
2495	 *    - If amdgpu_bad_page_threshold = -2,
2496	 *      bad_page_cnt_threshold = typical value by formula.
2497	 *
2498	 *    - When the value from user is 0 < amdgpu_bad_page_threshold <
2499	 *      max record length in eeprom, use it directly.
2500	 *
2501	 * Bad page retirement disablement:
2502	 *    - If amdgpu_bad_page_threshold = 0, bad page retirement
2503	 *      functionality is disabled, and bad_page_cnt_threshold will
2504	 *      take no effect.
2505	 */
2506
2507	if (amdgpu_bad_page_threshold < 0) {
2508		u64 val = adev->gmc.mc_vram_size;
2509
2510		do_div(val, RAS_BAD_PAGE_COVER);
2511		con->bad_page_cnt_threshold = min(lower_32_bits(val),
2512						  max_count);
2513	} else {
2514		con->bad_page_cnt_threshold = min_t(int, max_count,
2515						    amdgpu_bad_page_threshold);
2516	}
2517}
2518
2519int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
2520{
2521	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2522	struct ras_err_handler_data **data;
2523	u32  max_eeprom_records_count = 0;
2524	bool exc_err_limit = false;
2525	int ret;
2526
2527	if (!con || amdgpu_sriov_vf(adev))
2528		return 0;
2529
2530	/* Allow access to RAS EEPROM via debugfs, when the ASIC
2531	 * supports RAS and debugfs is enabled, but when
2532	 * adev->ras_enabled is unset, i.e. when "ras_enable"
2533	 * module parameter is set to 0.
2534	 */
2535	con->adev = adev;
2536
2537	if (!adev->ras_enabled)
2538		return 0;
2539
2540	data = &con->eh_data;
2541	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
2542	if (!*data) {
2543		ret = -ENOMEM;
2544		goto out;
2545	}
2546
2547	mutex_init(&con->recovery_lock);
2548	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
2549	atomic_set(&con->in_recovery, 0);
2550	con->eeprom_control.bad_channel_bitmap = 0;
2551
2552	max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
2553	amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
2554
2555	/* Todo: During test the SMU might fail to read the eeprom through I2C
2556	 * when the GPU is pending on XGMI reset during probe time
2557	 * (Mostly after second bus reset), skip it now
2558	 */
2559	if (adev->gmc.xgmi.pending_reset)
2560		return 0;
2561	ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
2562	/*
2563	 * This calling fails when exc_err_limit is true or
2564	 * ret != 0.
2565	 */
2566	if (exc_err_limit || ret)
2567		goto free;
2568
2569	if (con->eeprom_control.ras_num_recs) {
2570		ret = amdgpu_ras_load_bad_pages(adev);
2571		if (ret)
2572			goto free;
2573
2574		amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
2575
2576		if (con->update_channel_flag == true) {
2577			amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
2578			con->update_channel_flag = false;
2579		}
2580	}
2581
2582#ifdef CONFIG_X86_MCE_AMD
2583	if ((adev->asic_type == CHIP_ALDEBARAN) &&
2584	    (adev->gmc.xgmi.connected_to_cpu))
2585		amdgpu_register_bad_pages_mca_notifier(adev);
2586#endif
2587	return 0;
2588
2589free:
2590	kfree((*data)->bps);
2591	kfree(*data);
2592	con->eh_data = NULL;
2593out:
2594	dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
2595
2596	/*
2597	 * Except error threshold exceeding case, other failure cases in this
2598	 * function would not fail amdgpu driver init.
2599	 */
2600	if (!exc_err_limit)
2601		ret = 0;
2602	else
2603		ret = -EINVAL;
2604
2605	return ret;
2606}
2607
2608static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
2609{
2610	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2611	struct ras_err_handler_data *data = con->eh_data;
2612
2613	/* recovery_init failed to init it, fini is useless */
2614	if (!data)
2615		return 0;
2616
2617	cancel_work_sync(&con->recovery_work);
2618
2619	mutex_lock(&con->recovery_lock);
2620	con->eh_data = NULL;
2621	kfree(data->bps);
2622	kfree(data);
2623	mutex_unlock(&con->recovery_lock);
2624
2625	return 0;
2626}
2627/* recovery end */
2628
2629static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
2630{
2631	if (amdgpu_sriov_vf(adev)) {
2632		switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
2633		case IP_VERSION(13, 0, 2):
2634		case IP_VERSION(13, 0, 6):
2635			return true;
2636		default:
2637			return false;
2638		}
2639	}
2640
2641	if (adev->asic_type == CHIP_IP_DISCOVERY) {
2642		switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
2643		case IP_VERSION(13, 0, 0):
2644		case IP_VERSION(13, 0, 6):
2645		case IP_VERSION(13, 0, 10):
2646			return true;
2647		default:
2648			return false;
2649		}
2650	}
2651
2652	return adev->asic_type == CHIP_VEGA10 ||
2653		adev->asic_type == CHIP_VEGA20 ||
2654		adev->asic_type == CHIP_ARCTURUS ||
2655		adev->asic_type == CHIP_ALDEBARAN ||
2656		adev->asic_type == CHIP_SIENNA_CICHLID;
2657}
2658
2659/*
2660 * this is workaround for vega20 workstation sku,
2661 * force enable gfx ras, ignore vbios gfx ras flag
2662 * due to GC EDC can not write
2663 */
2664static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
2665{
2666	struct atom_context *ctx = adev->mode_info.atom_context;
2667
2668	if (!ctx)
2669		return;
2670
2671	if (strnstr(ctx->vbios_pn, "D16406",
2672		    sizeof(ctx->vbios_pn)) ||
2673		strnstr(ctx->vbios_pn, "D36002",
2674			sizeof(ctx->vbios_pn)))
2675		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
2676}
2677
2678/*
2679 * check hardware's ras ability which will be saved in hw_supported.
2680 * if hardware does not support ras, we can skip some ras initializtion and
2681 * forbid some ras operations from IP.
2682 * if software itself, say boot parameter, limit the ras ability. We still
2683 * need allow IP do some limited operations, like disable. In such case,
2684 * we have to initialize ras as normal. but need check if operation is
2685 * allowed or not in each function.
2686 */
2687static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
2688{
2689	adev->ras_hw_enabled = adev->ras_enabled = 0;
2690
2691	if (!amdgpu_ras_asic_supported(adev))
2692		return;
2693
2694	if (!adev->gmc.xgmi.connected_to_cpu &&	!adev->gmc.is_app_apu) {
2695		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
2696			dev_info(adev->dev, "MEM ECC is active.\n");
2697			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
2698						   1 << AMDGPU_RAS_BLOCK__DF);
2699		} else {
2700			dev_info(adev->dev, "MEM ECC is not presented.\n");
2701		}
2702
2703		if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
2704			dev_info(adev->dev, "SRAM ECC is active.\n");
2705			if (!amdgpu_sriov_vf(adev))
2706				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
2707							    1 << AMDGPU_RAS_BLOCK__DF);
2708			else
2709				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
2710								1 << AMDGPU_RAS_BLOCK__SDMA |
2711								1 << AMDGPU_RAS_BLOCK__GFX);
2712
2713			/* VCN/JPEG RAS can be supported on both bare metal and
2714			 * SRIOV environment
2715			 */
2716			if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
2717				    IP_VERSION(2, 6, 0) ||
2718			    amdgpu_ip_version(adev, VCN_HWIP, 0) ==
2719				    IP_VERSION(4, 0, 0) ||
2720			    amdgpu_ip_version(adev, VCN_HWIP, 0) ==
2721				    IP_VERSION(4, 0, 3))
2722				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
2723							1 << AMDGPU_RAS_BLOCK__JPEG);
2724			else
2725				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
2726							1 << AMDGPU_RAS_BLOCK__JPEG);
2727
2728			/*
2729			 * XGMI RAS is not supported if xgmi num physical nodes
2730			 * is zero
2731			 */
2732			if (!adev->gmc.xgmi.num_physical_nodes)
2733				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
2734		} else {
2735			dev_info(adev->dev, "SRAM ECC is not presented.\n");
2736		}
2737	} else {
2738		/* driver only manages a few IP blocks RAS feature
2739		 * when GPU is connected cpu through XGMI */
2740		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
2741					   1 << AMDGPU_RAS_BLOCK__SDMA |
2742					   1 << AMDGPU_RAS_BLOCK__MMHUB);
2743	}
2744
2745	amdgpu_ras_get_quirks(adev);
2746
2747	/* hw_supported needs to be aligned with RAS block mask. */
2748	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
2749
2750	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
2751		adev->ras_hw_enabled & amdgpu_ras_mask;
2752}
2753
2754static void amdgpu_ras_counte_dw(struct work_struct *work)
2755{
2756	struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
2757					      ras_counte_delay_work.work);
2758	struct amdgpu_device *adev = con->adev;
2759	struct drm_device *dev = adev_to_drm(adev);
2760	unsigned long ce_count, ue_count;
2761	int res;
2762
2763	res = pm_runtime_get_sync(dev->dev);
2764	if (res < 0)
2765		goto Out;
2766
2767	/* Cache new values.
2768	 */
2769	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
2770		atomic_set(&con->ras_ce_count, ce_count);
2771		atomic_set(&con->ras_ue_count, ue_count);
2772	}
2773
2774	pm_runtime_mark_last_busy(dev->dev);
2775Out:
2776	pm_runtime_put_autosuspend(dev->dev);
2777}
2778
2779static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
2780{
2781	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2782	bool df_poison, umc_poison;
2783
2784	/* poison setting is useless on SRIOV guest */
2785	if (amdgpu_sriov_vf(adev) || !con)
2786		return;
2787
2788	/* Init poison supported flag, the default value is false */
2789	if (adev->gmc.xgmi.connected_to_cpu ||
2790	    adev->gmc.is_app_apu) {
2791		/* enabled by default when GPU is connected to CPU */
2792		con->poison_supported = true;
2793	} else if (adev->df.funcs &&
2794	    adev->df.funcs->query_ras_poison_mode &&
2795	    adev->umc.ras &&
2796	    adev->umc.ras->query_ras_poison_mode) {
2797		df_poison =
2798			adev->df.funcs->query_ras_poison_mode(adev);
2799		umc_poison =
2800			adev->umc.ras->query_ras_poison_mode(adev);
2801
2802		/* Only poison is set in both DF and UMC, we can support it */
2803		if (df_poison && umc_poison)
2804			con->poison_supported = true;
2805		else if (df_poison != umc_poison)
2806			dev_warn(adev->dev,
2807				"Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
2808				df_poison, umc_poison);
2809	}
2810}
2811
2812static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
2813{
2814	return  amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
2815			AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
2816			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
2817			AMDGPU_RAS_ERROR__PARITY;
2818}
2819
2820int amdgpu_ras_init(struct amdgpu_device *adev)
2821{
2822	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2823	int r;
2824
2825	if (con)
2826		return 0;
2827
2828	con = kmalloc(sizeof(struct amdgpu_ras) +
2829			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
2830			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
2831			GFP_KERNEL|__GFP_ZERO);
2832	if (!con)
2833		return -ENOMEM;
2834
2835	con->adev = adev;
2836	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
2837	atomic_set(&con->ras_ce_count, 0);
2838	atomic_set(&con->ras_ue_count, 0);
2839
2840	con->objs = (struct ras_manager *)(con + 1);
2841
2842	amdgpu_ras_set_context(adev, con);
2843
2844	amdgpu_ras_check_supported(adev);
2845
2846	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
2847		/* set gfx block ras context feature for VEGA20 Gaming
2848		 * send ras disable cmd to ras ta during ras late init.
2849		 */
2850		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
2851			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
2852
2853			return 0;
2854		}
2855
2856		r = 0;
2857		goto release_con;
2858	}
2859
2860	con->update_channel_flag = false;
2861	con->features = 0;
2862	con->schema = 0;
2863	INIT_LIST_HEAD(&con->head);
2864	/* Might need get this flag from vbios. */
2865	con->flags = RAS_DEFAULT_FLAGS;
2866
2867	/* initialize nbio ras function ahead of any other
2868	 * ras functions so hardware fatal error interrupt
2869	 * can be enabled as early as possible */
2870	switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
2871	case IP_VERSION(7, 4, 0):
2872	case IP_VERSION(7, 4, 1):
2873	case IP_VERSION(7, 4, 4):
2874		if (!adev->gmc.xgmi.connected_to_cpu)
2875			adev->nbio.ras = &nbio_v7_4_ras;
2876		break;
2877	case IP_VERSION(4, 3, 0):
2878		if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
2879			/* unlike other generation of nbio ras,
2880			 * nbio v4_3 only support fatal error interrupt
2881			 * to inform software that DF is freezed due to
2882			 * system fatal error event. driver should not
2883			 * enable nbio ras in such case. Instead,
2884			 * check DF RAS */
2885			adev->nbio.ras = &nbio_v4_3_ras;
2886		break;
2887	case IP_VERSION(7, 9, 0):
2888		if (!adev->gmc.is_app_apu)
2889			adev->nbio.ras = &nbio_v7_9_ras;
2890		break;
2891	default:
2892		/* nbio ras is not available */
2893		break;
2894	}
2895
2896	/* nbio ras block needs to be enabled ahead of other ras blocks
2897	 * to handle fatal error */
2898	r = amdgpu_nbio_ras_sw_init(adev);
2899	if (r)
2900		return r;
2901
2902	if (adev->nbio.ras &&
2903	    adev->nbio.ras->init_ras_controller_interrupt) {
2904		r = adev->nbio.ras->init_ras_controller_interrupt(adev);
2905		if (r)
2906			goto release_con;
2907	}
2908
2909	if (adev->nbio.ras &&
2910	    adev->nbio.ras->init_ras_err_event_athub_interrupt) {
2911		r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
2912		if (r)
2913			goto release_con;
2914	}
2915
2916	amdgpu_ras_query_poison_mode(adev);
2917
2918	/* Get RAS schema for particular SOC */
2919	con->schema = amdgpu_get_ras_schema(adev);
2920
2921	if (amdgpu_ras_fs_init(adev)) {
2922		r = -EINVAL;
2923		goto release_con;
2924	}
2925
2926	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
2927		 "hardware ability[%x] ras_mask[%x]\n",
2928		 adev->ras_hw_enabled, adev->ras_enabled);
2929
2930	return 0;
2931release_con:
2932	amdgpu_ras_set_context(adev, NULL);
2933	kfree(con);
2934
2935	return r;
2936}
2937
2938int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
2939{
2940	if (adev->gmc.xgmi.connected_to_cpu ||
2941	    adev->gmc.is_app_apu)
2942		return 1;
2943	return 0;
2944}
2945
2946static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
2947					struct ras_common_if *ras_block)
2948{
2949	struct ras_query_if info = {
2950		.head = *ras_block,
2951	};
2952
2953	if (!amdgpu_persistent_edc_harvesting_supported(adev))
2954		return 0;
2955
2956	if (amdgpu_ras_query_error_status(adev, &info) != 0)
2957		DRM_WARN("RAS init harvest failure");
2958
2959	if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
2960		DRM_WARN("RAS init harvest reset failure");
2961
2962	return 0;
2963}
2964
2965bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
2966{
2967       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2968
2969       if (!con)
2970               return false;
2971
2972       return con->poison_supported;
2973}
2974
2975/* helper function to handle common stuff in ip late init phase */
2976int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
2977			 struct ras_common_if *ras_block)
2978{
2979	struct amdgpu_ras_block_object *ras_obj = NULL;
2980	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2981	struct ras_query_if *query_info;
2982	unsigned long ue_count, ce_count;
2983	int r;
2984
2985	/* disable RAS feature per IP block if it is not supported */
2986	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
2987		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
2988		return 0;
2989	}
2990
2991	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
2992	if (r) {
2993		if (adev->in_suspend || amdgpu_in_reset(adev)) {
2994			/* in resume phase, if fail to enable ras,
2995			 * clean up all ras fs nodes, and disable ras */
2996			goto cleanup;
2997		} else
2998			return r;
2999	}
3000
3001	/* check for errors on warm reset edc persisant supported ASIC */
3002	amdgpu_persistent_edc_harvesting(adev, ras_block);
3003
3004	/* in resume phase, no need to create ras fs node */
3005	if (adev->in_suspend || amdgpu_in_reset(adev))
3006		return 0;
3007
3008	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
3009	if (ras_obj->ras_cb || (ras_obj->hw_ops &&
3010	    (ras_obj->hw_ops->query_poison_status ||
3011	    ras_obj->hw_ops->handle_poison_consumption))) {
3012		r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
3013		if (r)
3014			goto cleanup;
3015	}
3016
3017	if (ras_obj->hw_ops &&
3018	    (ras_obj->hw_ops->query_ras_error_count ||
3019	     ras_obj->hw_ops->query_ras_error_status)) {
3020		r = amdgpu_ras_sysfs_create(adev, ras_block);
3021		if (r)
3022			goto interrupt;
3023
3024		/* Those are the cached values at init.
3025		 */
3026		query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
3027		if (!query_info)
3028			return -ENOMEM;
3029		memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
3030
3031		if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
3032			atomic_set(&con->ras_ce_count, ce_count);
3033			atomic_set(&con->ras_ue_count, ue_count);
3034		}
3035
3036		kfree(query_info);
3037	}
3038
3039	return 0;
3040
3041interrupt:
3042	if (ras_obj->ras_cb)
3043		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
3044cleanup:
3045	amdgpu_ras_feature_enable(adev, ras_block, 0);
3046	return r;
3047}
3048
3049static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
3050			 struct ras_common_if *ras_block)
3051{
3052	return amdgpu_ras_block_late_init(adev, ras_block);
3053}
3054
3055/* helper function to remove ras fs node and interrupt handler */
3056void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
3057			  struct ras_common_if *ras_block)
3058{
3059	struct amdgpu_ras_block_object *ras_obj;
3060	if (!ras_block)
3061		return;
3062
3063	amdgpu_ras_sysfs_remove(adev, ras_block);
3064
3065	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
3066	if (ras_obj->ras_cb)
3067		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
3068}
3069
3070static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
3071			  struct ras_common_if *ras_block)
3072{
3073	return amdgpu_ras_block_late_fini(adev, ras_block);
3074}
3075
3076/* do some init work after IP late init as dependence.
3077 * and it runs in resume/gpu reset/booting up cases.
3078 */
3079void amdgpu_ras_resume(struct amdgpu_device *adev)
3080{
3081	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3082	struct ras_manager *obj, *tmp;
3083
3084	if (!adev->ras_enabled || !con) {
3085		/* clean ras context for VEGA20 Gaming after send ras disable cmd */
3086		amdgpu_release_ras_context(adev);
3087
3088		return;
3089	}
3090
3091	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
3092		/* Set up all other IPs which are not implemented. There is a
3093		 * tricky thing that IP's actual ras error type should be
3094		 * MULTI_UNCORRECTABLE, but as driver does not handle it, so
3095		 * ERROR_NONE make sense anyway.
3096		 */
3097		amdgpu_ras_enable_all_features(adev, 1);
3098
3099		/* We enable ras on all hw_supported block, but as boot
3100		 * parameter might disable some of them and one or more IP has
3101		 * not implemented yet. So we disable them on behalf.
3102		 */
3103		list_for_each_entry_safe(obj, tmp, &con->head, node) {
3104			if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
3105				amdgpu_ras_feature_enable(adev, &obj->head, 0);
3106				/* there should be no any reference. */
3107				WARN_ON(alive_obj(obj));
3108			}
3109		}
3110	}
3111}
3112
3113void amdgpu_ras_suspend(struct amdgpu_device *adev)
3114{
3115	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3116
3117	if (!adev->ras_enabled || !con)
3118		return;
3119
3120	amdgpu_ras_disable_all_features(adev, 0);
3121	/* Make sure all ras objects are disabled. */
3122	if (con->features)
3123		amdgpu_ras_disable_all_features(adev, 1);
3124}
3125
3126int amdgpu_ras_late_init(struct amdgpu_device *adev)
3127{
3128	struct amdgpu_ras_block_list *node, *tmp;
3129	struct amdgpu_ras_block_object *obj;
3130	int r;
3131
3132	/* Guest side doesn't need init ras feature */
3133	if (amdgpu_sriov_vf(adev))
3134		return 0;
3135
3136	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
3137		if (!node->ras_obj) {
3138			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
3139			continue;
3140		}
3141
3142		obj = node->ras_obj;
3143		if (obj->ras_late_init) {
3144			r = obj->ras_late_init(adev, &obj->ras_comm);
3145			if (r) {
3146				dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
3147					obj->ras_comm.name, r);
3148				return r;
3149			}
3150		} else
3151			amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
3152	}
3153
3154	return 0;
3155}
3156
3157/* do some fini work before IP fini as dependence */
3158int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
3159{
3160	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3161
3162	if (!adev->ras_enabled || !con)
3163		return 0;
3164
3165
3166	/* Need disable ras on all IPs here before ip [hw/sw]fini */
3167	if (con->features)
3168		amdgpu_ras_disable_all_features(adev, 0);
3169	amdgpu_ras_recovery_fini(adev);
3170	return 0;
3171}
3172
3173int amdgpu_ras_fini(struct amdgpu_device *adev)
3174{
3175	struct amdgpu_ras_block_list *ras_node, *tmp;
3176	struct amdgpu_ras_block_object *obj = NULL;
3177	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3178
3179	if (!adev->ras_enabled || !con)
3180		return 0;
3181
3182	list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
3183		if (ras_node->ras_obj) {
3184			obj = ras_node->ras_obj;
3185			if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
3186			    obj->ras_fini)
3187				obj->ras_fini(adev, &obj->ras_comm);
3188			else
3189				amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
3190		}
3191
3192		/* Clear ras blocks from ras_list and free ras block list node */
3193		list_del(&ras_node->node);
3194		kfree(ras_node);
3195	}
3196
3197	amdgpu_ras_fs_fini(adev);
3198	amdgpu_ras_interrupt_remove_all(adev);
3199
3200	WARN(con->features, "Feature mask is not cleared");
3201
3202	if (con->features)
3203		amdgpu_ras_disable_all_features(adev, 1);
3204
3205	cancel_delayed_work_sync(&con->ras_counte_delay_work);
3206
3207	amdgpu_ras_set_context(adev, NULL);
3208	kfree(con);
3209
3210	return 0;
3211}
3212
3213void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
3214{
3215	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
3216		struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3217
3218		dev_info(adev->dev, "uncorrectable hardware error"
3219			"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
3220
3221		ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
3222		amdgpu_ras_reset_gpu(adev);
3223	}
3224}
3225
3226bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
3227{
3228	if (adev->asic_type == CHIP_VEGA20 &&
3229	    adev->pm.fw_version <= 0x283400) {
3230		return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
3231				amdgpu_ras_intr_triggered();
3232	}
3233
3234	return false;
3235}
3236
3237void amdgpu_release_ras_context(struct amdgpu_device *adev)
3238{
3239	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3240
3241	if (!con)
3242		return;
3243
3244	if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
3245		con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
3246		amdgpu_ras_set_context(adev, NULL);
3247		kfree(con);
3248	}
3249}
3250
3251#ifdef CONFIG_X86_MCE_AMD
3252static struct amdgpu_device *find_adev(uint32_t node_id)
3253{
3254	int i;
3255	struct amdgpu_device *adev = NULL;
3256
3257	for (i = 0; i < mce_adev_list.num_gpu; i++) {
3258		adev = mce_adev_list.devs[i];
3259
3260		if (adev && adev->gmc.xgmi.connected_to_cpu &&
3261		    adev->gmc.xgmi.physical_node_id == node_id)
3262			break;
3263		adev = NULL;
3264	}
3265
3266	return adev;
3267}
3268
3269#define GET_MCA_IPID_GPUID(m)	(((m) >> 44) & 0xF)
3270#define GET_UMC_INST(m)		(((m) >> 21) & 0x7)
3271#define GET_CHAN_INDEX(m)	((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
3272#define GPU_ID_OFFSET		8
3273
3274static int amdgpu_bad_page_notifier(struct notifier_block *nb,
3275				    unsigned long val, void *data)
3276{
3277	struct mce *m = (struct mce *)data;
3278	struct amdgpu_device *adev = NULL;
3279	uint32_t gpu_id = 0;
3280	uint32_t umc_inst = 0, ch_inst = 0;
3281
3282	/*
3283	 * If the error was generated in UMC_V2, which belongs to GPU UMCs,
3284	 * and error occurred in DramECC (Extended error code = 0) then only
3285	 * process the error, else bail out.
3286	 */
3287	if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
3288		    (XEC(m->status, 0x3f) == 0x0)))
3289		return NOTIFY_DONE;
3290
3291	/*
3292	 * If it is correctable error, return.
3293	 */
3294	if (mce_is_correctable(m))
3295		return NOTIFY_OK;
3296
3297	/*
3298	 * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
3299	 */
3300	gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
3301
3302	adev = find_adev(gpu_id);
3303	if (!adev) {
3304		DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
3305								gpu_id);
3306		return NOTIFY_DONE;
3307	}
3308
3309	/*
3310	 * If it is uncorrectable error, then find out UMC instance and
3311	 * channel index.
3312	 */
3313	umc_inst = GET_UMC_INST(m->ipid);
3314	ch_inst = GET_CHAN_INDEX(m->ipid);
3315
3316	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
3317			     umc_inst, ch_inst);
3318
3319	if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
3320		return NOTIFY_OK;
3321	else
3322		return NOTIFY_DONE;
3323}
3324
3325static struct notifier_block amdgpu_bad_page_nb = {
3326	.notifier_call  = amdgpu_bad_page_notifier,
3327	.priority       = MCE_PRIO_UC,
3328};
3329
3330static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
3331{
3332	/*
3333	 * Add the adev to the mce_adev_list.
3334	 * During mode2 reset, amdgpu device is temporarily
3335	 * removed from the mgpu_info list which can cause
3336	 * page retirement to fail.
3337	 * Use this list instead of mgpu_info to find the amdgpu
3338	 * device on which the UMC error was reported.
3339	 */
3340	mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
3341
3342	/*
3343	 * Register the x86 notifier only once
3344	 * with MCE subsystem.
3345	 */
3346	if (notifier_registered == false) {
3347		mce_register_decode_chain(&amdgpu_bad_page_nb);
3348		notifier_registered = true;
3349	}
3350}
3351#endif
3352
3353struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
3354{
3355	if (!adev)
3356		return NULL;
3357
3358	return adev->psp.ras_context.ras;
3359}
3360
3361int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
3362{
3363	if (!adev)
3364		return -EINVAL;
3365
3366	adev->psp.ras_context.ras = ras_con;
3367	return 0;
3368}
3369
3370/* check if ras is supported on block, say, sdma, gfx */
3371int amdgpu_ras_is_supported(struct amdgpu_device *adev,
3372		unsigned int block)
3373{
3374	int ret = 0;
3375	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3376
3377	if (block >= AMDGPU_RAS_BLOCK_COUNT)
3378		return 0;
3379
3380	ret = ras && (adev->ras_enabled & (1 << block));
3381
3382	/* For the special asic with mem ecc enabled but sram ecc
3383	 * not enabled, even if the ras block is not supported on
3384	 * .ras_enabled, if the asic supports poison mode and the
3385	 * ras block has ras configuration, it can be considered
3386	 * that the ras block supports ras function.
3387	 */
3388	if (!ret &&
3389	    (block == AMDGPU_RAS_BLOCK__GFX ||
3390	     block == AMDGPU_RAS_BLOCK__SDMA ||
3391	     block == AMDGPU_RAS_BLOCK__VCN ||
3392	     block == AMDGPU_RAS_BLOCK__JPEG) &&
3393	    amdgpu_ras_is_poison_mode_supported(adev) &&
3394	    amdgpu_ras_get_ras_block(adev, block, 0))
3395		ret = 1;
3396
3397	return ret;
3398}
3399
3400int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
3401{
3402	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3403
3404	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
3405		amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
3406	return 0;
3407}
3408
3409void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
3410{
3411	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3412
3413	if (con)
3414		con->is_mca_debug_mode = enable;
3415}
3416
3417bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
3418{
3419	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3420	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
3421
3422	if (!con)
3423		return false;
3424
3425	if (mca_funcs && mca_funcs->mca_set_debug_mode)
3426		return con->is_mca_debug_mode;
3427	else
3428		return true;
3429}
3430
3431bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
3432				     unsigned int *error_query_mode)
3433{
3434	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3435	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
3436
3437	if (!con) {
3438		*error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
3439		return false;
3440	}
3441
3442	if (mca_funcs && mca_funcs->mca_set_debug_mode)
3443		*error_query_mode =
3444			(con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
3445	else
3446		*error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
3447
3448	return true;
3449}
3450
3451/* Register each ip ras block into amdgpu ras */
3452int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
3453		struct amdgpu_ras_block_object *ras_block_obj)
3454{
3455	struct amdgpu_ras_block_list *ras_node;
3456	if (!adev || !ras_block_obj)
3457		return -EINVAL;
3458
3459	ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
3460	if (!ras_node)
3461		return -ENOMEM;
3462
3463	INIT_LIST_HEAD(&ras_node->node);
3464	ras_node->ras_obj = ras_block_obj;
3465	list_add_tail(&ras_node->node, &adev->ras_list);
3466
3467	return 0;
3468}
3469
3470void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
3471{
3472	if (!err_type_name)
3473		return;
3474
3475	switch (err_type) {
3476	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
3477		sprintf(err_type_name, "correctable");
3478		break;
3479	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
3480		sprintf(err_type_name, "uncorrectable");
3481		break;
3482	default:
3483		sprintf(err_type_name, "unknown");
3484		break;
3485	}
3486}
3487
3488bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
3489					 const struct amdgpu_ras_err_status_reg_entry *reg_entry,
3490					 uint32_t instance,
3491					 uint32_t *memory_id)
3492{
3493	uint32_t err_status_lo_data, err_status_lo_offset;
3494
3495	if (!reg_entry)
3496		return false;
3497
3498	err_status_lo_offset =
3499		AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
3500					    reg_entry->seg_lo, reg_entry->reg_lo);
3501	err_status_lo_data = RREG32(err_status_lo_offset);
3502
3503	if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
3504	    !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
3505		return false;
3506
3507	*memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
3508
3509	return true;
3510}
3511
3512bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
3513				       const struct amdgpu_ras_err_status_reg_entry *reg_entry,
3514				       uint32_t instance,
3515				       unsigned long *err_cnt)
3516{
3517	uint32_t err_status_hi_data, err_status_hi_offset;
3518
3519	if (!reg_entry)
3520		return false;
3521
3522	err_status_hi_offset =
3523		AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
3524					    reg_entry->seg_hi, reg_entry->reg_hi);
3525	err_status_hi_data = RREG32(err_status_hi_offset);
3526
3527	if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
3528	    !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
3529		/* keep the check here in case we need to refer to the result later */
3530		dev_dbg(adev->dev, "Invalid err_info field\n");
3531
3532	/* read err count */
3533	*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
3534
3535	return true;
3536}
3537
3538void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
3539					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
3540					   uint32_t reg_list_size,
3541					   const struct amdgpu_ras_memory_id_entry *mem_list,
3542					   uint32_t mem_list_size,
3543					   uint32_t instance,
3544					   uint32_t err_type,
3545					   unsigned long *err_count)
3546{
3547	uint32_t memory_id;
3548	unsigned long err_cnt;
3549	char err_type_name[16];
3550	uint32_t i, j;
3551
3552	for (i = 0; i < reg_list_size; i++) {
3553		/* query memory_id from err_status_lo */
3554		if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
3555							 instance, &memory_id))
3556			continue;
3557
3558		/* query err_cnt from err_status_hi */
3559		if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
3560						       instance, &err_cnt) ||
3561		    !err_cnt)
3562			continue;
3563
3564		*err_count += err_cnt;
3565
3566		/* log the errors */
3567		amdgpu_ras_get_error_type_name(err_type, err_type_name);
3568		if (!mem_list) {
3569			/* memory_list is not supported */
3570			dev_info(adev->dev,
3571				 "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
3572				 err_cnt, err_type_name,
3573				 reg_list[i].block_name,
3574				 instance, memory_id);
3575		} else {
3576			for (j = 0; j < mem_list_size; j++) {
3577				if (memory_id == mem_list[j].memory_id) {
3578					dev_info(adev->dev,
3579						 "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
3580						 err_cnt, err_type_name,
3581						 reg_list[i].block_name,
3582						 instance, mem_list[j].name);
3583					break;
3584				}
3585			}
3586		}
3587	}
3588}
3589
3590void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
3591					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
3592					   uint32_t reg_list_size,
3593					   uint32_t instance)
3594{
3595	uint32_t err_status_lo_offset, err_status_hi_offset;
3596	uint32_t i;
3597
3598	for (i = 0; i < reg_list_size; i++) {
3599		err_status_lo_offset =
3600			AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
3601						    reg_list[i].seg_lo, reg_list[i].reg_lo);
3602		err_status_hi_offset =
3603			AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
3604						    reg_list[i].seg_hi, reg_list[i].reg_hi);
3605		WREG32(err_status_lo_offset, 0);
3606		WREG32(err_status_hi_offset, 0);
3607	}
3608}
3609
3610int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
3611{
3612	memset(err_data, 0, sizeof(*err_data));
3613
3614	INIT_LIST_HEAD(&err_data->err_node_list);
3615
3616	return 0;
3617}
3618
3619static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
3620{
3621	if (!err_node)
3622		return;
3623
3624	list_del(&err_node->node);
3625	kvfree(err_node);
3626}
3627
3628void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
3629{
3630	struct ras_err_node *err_node, *tmp;
3631
3632	list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
3633		amdgpu_ras_error_node_release(err_node);
3634}
3635
3636static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
3637							     struct amdgpu_smuio_mcm_config_info *mcm_info)
3638{
3639	struct ras_err_node *err_node;
3640	struct amdgpu_smuio_mcm_config_info *ref_id;
3641
3642	if (!err_data || !mcm_info)
3643		return NULL;
3644
3645	for_each_ras_error(err_node, err_data) {
3646		ref_id = &err_node->err_info.mcm_info;
3647
3648		if (mcm_info->socket_id == ref_id->socket_id &&
3649		    mcm_info->die_id == ref_id->die_id)
3650			return err_node;
3651	}
3652
3653	return NULL;
3654}
3655
3656static struct ras_err_node *amdgpu_ras_error_node_new(void)
3657{
3658	struct ras_err_node *err_node;
3659
3660	err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
3661	if (!err_node)
3662		return NULL;
3663
3664	INIT_LIST_HEAD(&err_node->node);
3665
3666	return err_node;
3667}
3668
3669static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
3670{
3671	struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
3672	struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
3673	struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
3674	struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
3675
3676	if (unlikely(infoa->socket_id != infob->socket_id))
3677		return infoa->socket_id - infob->socket_id;
3678	else
3679		return infoa->die_id - infob->die_id;
3680
3681	return 0;
3682}
3683
3684static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
3685						      struct amdgpu_smuio_mcm_config_info *mcm_info)
3686{
3687	struct ras_err_node *err_node;
3688
3689	err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
3690	if (err_node)
3691		return &err_node->err_info;
3692
3693	err_node = amdgpu_ras_error_node_new();
3694	if (!err_node)
3695		return NULL;
3696
3697	memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
3698
3699	err_data->err_list_count++;
3700	list_add_tail(&err_node->node, &err_data->err_node_list);
3701	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
3702
3703	return &err_node->err_info;
3704}
3705
3706int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
3707					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
3708{
3709	struct ras_err_info *err_info;
3710
3711	if (!err_data || !mcm_info)
3712		return -EINVAL;
3713
3714	if (!count)
3715		return 0;
3716
3717	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
3718	if (!err_info)
3719		return -EINVAL;
3720
3721	err_info->ue_count += count;
3722	err_data->ue_count += count;
3723
3724	return 0;
3725}
3726
3727int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
3728					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
3729{
3730	struct ras_err_info *err_info;
3731
3732	if (!err_data || !mcm_info)
3733		return -EINVAL;
3734
3735	if (!count)
3736		return 0;
3737
3738	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
3739	if (!err_info)
3740		return -EINVAL;
3741
3742	err_info->ce_count += count;
3743	err_data->ce_count += count;
3744
3745	return 0;
3746}
Configure Feed

Configure Feed