Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22#ifndef __AMDGPU_XGMI_H__
23#define __AMDGPU_XGMI_H__
24
25#include <drm/task_barrier.h>
26#include "amdgpu_ras.h"
27
28enum amdgpu_xgmi_link_speed {
29 XGMI_SPEED_16GT = 16,
30 XGMI_SPEED_25GT = 25,
31 XGMI_SPEED_32GT = 32
32};
33
34struct amdgpu_hive_info {
35 struct kobject kobj;
36 uint64_t hive_id;
37 struct list_head device_list;
38 struct list_head node;
39 atomic_t number_devices;
40 struct mutex hive_lock;
41 int hi_req_count;
42 struct amdgpu_device *hi_req_gpu;
43 struct task_barrier tb;
44 enum {
45 AMDGPU_XGMI_PSTATE_MIN,
46 AMDGPU_XGMI_PSTATE_MAX_VEGA20,
47 AMDGPU_XGMI_PSTATE_UNKNOWN
48 } pstate;
49
50 struct amdgpu_reset_domain *reset_domain;
51 atomic_t ras_recovery;
52 struct ras_event_manager event_mgr;
53 struct work_struct reset_on_init_work;
54 atomic_t requested_nps_mode;
55};
56
57struct amdgpu_pcs_ras_field {
58 const char *err_name;
59 uint32_t pcs_err_mask;
60 uint32_t pcs_err_shift;
61};
62
63/**
64 * Bandwidth range reporting comes in two modes.
65 *
66 * PER_LINK - range for any xgmi link
67 * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
68 */
69enum amdgpu_xgmi_bw_mode {
70 AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
71 AMDGPU_XGMI_BW_MODE_PER_PEER
72};
73
74enum amdgpu_xgmi_bw_unit {
75 AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
76 AMDGPU_XGMI_BW_UNIT_MBYTES
77};
78
79struct amdgpu_xgmi_ras {
80 struct amdgpu_ras_block_object ras_block;
81};
82extern struct amdgpu_xgmi_ras xgmi_ras;
83
84struct amdgpu_xgmi {
85 /* from psp */
86 u64 node_id;
87 u64 hive_id;
88 /* fixed per family */
89 u64 node_segment_size;
90 /* physical node (0-3) */
91 unsigned physical_node_id;
92 /* number of nodes (0-4) */
93 unsigned num_physical_nodes;
94 /* gpu list in the same hive */
95 struct list_head head;
96 bool supported;
97 struct ras_common_if *ras_if;
98 bool connected_to_cpu;
99 struct amdgpu_xgmi_ras *ras;
100 enum amdgpu_xgmi_link_speed max_speed;
101 uint8_t max_width;
102};
103
104struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
105void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
106int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
107int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
108int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
109int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
110int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
111int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
112 enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
113 uint32_t *min_bw, uint32_t *max_bw);
114bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
115 struct amdgpu_device *peer_adev);
116uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
117 uint64_t addr);
118bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
119 struct amdgpu_device *bo_adev);
120int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
121int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
122
123int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
124 struct amdgpu_hive_info *hive,
125 int req_nps_mode);
126int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
127 int global_link_num);
128int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
129
130void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
131uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
132
133#endif