Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Unify ras block interface for each ras block

1. Define unified ops interface for each block.
2. Add ras_block_match function pointer in ops interface, each ras block can customize specail match function to identify itself.
3. Add amdgpu_ras_block_match_default new function. If a ras block doesn't define .ras_block_match, default execute amdgpu_ras_block_match_default to identify this ras block.
4. Define unified basic ras block data for each ras block.
5. Create dedicated amdgpu device ras block link list to manage all of the ras blocks.
6. Add amdgpu_ras_register_ras_block new function interface for each ras block to register itself to ras controlling block.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

yipechai and committed by
Alex Deucher
6492e1b0 685fae24

+78
+2
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 1091 1091 uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; 1092 1092 1093 1093 bool ram_is_direct_mapped; 1094 + 1095 + struct list_head ras_list; 1094 1096 }; 1095 1097 1096 1098 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3519 3519 3520 3520 INIT_LIST_HEAD(&adev->reset_list); 3521 3521 3522 + INIT_LIST_HEAD(&adev->ras_list); 3523 + 3522 3524 INIT_DELAYED_WORK(&adev->delayed_init_work, 3523 3525 amdgpu_device_delayed_init_work_handler); 3524 3526 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+46
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 866 866 } 867 867 /* feature ctl end */ 868 868 869 + int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object* block_obj, enum amdgpu_ras_block block) 870 + { 871 + if(!block_obj) 872 + return -EINVAL; 873 + 874 + if (block_obj->block == block) 875 + return 0; 876 + 877 + return -EINVAL; 878 + } 879 + 880 + static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct amdgpu_device *adev, 881 + enum amdgpu_ras_block block, uint32_t sub_block_index) 882 + { 883 + struct amdgpu_ras_block_object *obj, *tmp; 884 + 885 + if (block >= AMDGPU_RAS_BLOCK__LAST) 886 + return NULL; 887 + 888 + if (!amdgpu_ras_is_supported(adev, block)) 889 + return NULL; 890 + 891 + list_for_each_entry_safe(obj, tmp, &adev->ras_list, node) { 892 + if (obj->ras_block_match) { 893 + if (obj->ras_block_match(obj, block, sub_block_index) == 0) 894 + return obj; 895 + } else { 896 + if (amdgpu_ras_block_match_default(obj, block) == 0) 897 + return obj; 898 + } 899 + } 900 + 901 + return NULL; 902 + } 869 903 870 904 static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, 871 905 struct ras_common_if *ras_block, ··· 2810 2776 } 2811 2777 } 2812 2778 #endif 2779 + /* Register each ip ras block into amdgpu ras */ 2780 + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, 2781 + struct amdgpu_ras_block_object* ras_block_obj) 2782 + { 2783 + if (!adev || !ras_block_obj) 2784 + return -EINVAL; 2785 + 2786 + INIT_LIST_HEAD(&ras_block_obj->node); 2787 + list_add_tail(&ras_block_obj->node, &adev->ras_list); 2788 + 2789 + return 0; 2790 + }
+28
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
··· 484 484 }; 485 485 int op; 486 486 }; 487 + 488 + struct amdgpu_ras_block_object { 489 + /* block name */ 490 + char name[32]; 491 + 492 + enum amdgpu_ras_block block; 493 + 494 + uint32_t sub_block_index; 495 + 496 + /* ras block link */ 497 + struct list_head node; 498 + 499 + int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); 500 + int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); 501 + void (*ras_fini)(struct amdgpu_device *adev); 502 + const struct amdgpu_ras_block_hw_ops *hw_ops; 503 + }; 504 + 505 + struct amdgpu_ras_block_hw_ops { 506 + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); 507 + void (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status); 508 + void (*query_ras_error_status)(struct amdgpu_device *adev); 509 + void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); 510 + void (*reset_ras_error_count)(struct amdgpu_device *adev); 511 + void (*reset_ras_error_status)(struct amdgpu_device *adev); 512 + }; 513 + 487 514 /* work flow 488 515 * vbios 489 516 * 1: ras feature enable (enabled by default) ··· 694 667 695 668 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); 696 669 670 + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj); 697 671 #endif