Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add support for more per-process flag

Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5

v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
Remove unused else condition
v3: Bump the KFD API version
v4: Missed SH_MEM_CONFIG__PRECISION_MODE__SHIFT define. Added it.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Harish Kasiviswanathan and committed by
Alex Deucher
cf6d949a 61972cd9

+47 -19
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 606 606 default_policy, 607 607 alternate_policy, 608 608 (void __user *)args->alternate_aperture_base, 609 - args->alternate_aperture_size)) 609 + args->alternate_aperture_size, 610 + args->misc_process_flag)) 610 611 err = -EINVAL; 611 612 612 613 out:
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 2596 2596 enum cache_policy default_policy, 2597 2597 enum cache_policy alternate_policy, 2598 2598 void __user *alternate_aperture_base, 2599 - uint64_t alternate_aperture_size) 2599 + uint64_t alternate_aperture_size, 2600 + u32 misc_process_properties) 2600 2601 { 2601 2602 bool retval = true; 2602 2603 ··· 2612 2611 default_policy, 2613 2612 alternate_policy, 2614 2613 alternate_aperture_base, 2615 - alternate_aperture_size); 2614 + alternate_aperture_size, 2615 + misc_process_properties); 2616 2616 2617 2617 if (retval) 2618 2618 goto out;
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 174 174 enum cache_policy default_policy, 175 175 enum cache_policy alternate_policy, 176 176 void __user *alternate_aperture_base, 177 - uint64_t alternate_aperture_size); 177 + uint64_t alternate_aperture_size, 178 + u32 misc_process_properties); 178 179 179 180 int (*process_termination)(struct device_queue_manager *dqm, 180 181 struct qcm_process_device *qpd); ··· 211 210 enum cache_policy default_policy, 212 211 enum cache_policy alternate_policy, 213 212 void __user *alternate_aperture_base, 214 - uint64_t alternate_aperture_size); 213 + uint64_t alternate_aperture_size, 214 + u32 misc_process_properties); 215 215 void (*init_sdma_vm)(struct device_queue_manager *dqm, 216 216 struct queue *q, 217 217 struct qcm_process_device *qpd);
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
··· 40 40 enum cache_policy default_policy, 41 41 enum cache_policy alternate_policy, 42 42 void __user *alternate_aperture_base, 43 - uint64_t alternate_aperture_size); 43 + uint64_t alternate_aperture_size, 44 + u32 misc_process_properties); 44 45 static int update_qpd_cik(struct device_queue_manager *dqm, 45 46 struct qcm_process_device *qpd); 46 47 static void init_sdma_vm(struct device_queue_manager *dqm, ··· 89 88 enum cache_policy default_policy, 90 89 enum cache_policy alternate_policy, 91 90 void __user *alternate_aperture_base, 92 - uint64_t alternate_aperture_size) 91 + uint64_t alternate_aperture_size, 92 + u32 misc_process_properties) 93 93 { 94 94 uint32_t default_mtype; 95 95 uint32_t ape1_mtype;
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
··· 36 36 enum cache_policy default_policy, 37 37 enum cache_policy alternate_policy, 38 38 void __user *alternate_aperture_base, 39 - uint64_t alternate_aperture_size); 39 + uint64_t alternate_aperture_size, 40 + u32 misc_process_properties); 40 41 41 42 void device_queue_manager_init_v10( 42 43 struct device_queue_manager_asic_ops *asic_ops) ··· 62 61 enum cache_policy default_policy, 63 62 enum cache_policy alternate_policy, 64 63 void __user *alternate_aperture_base, 65 - uint64_t alternate_aperture_size) 64 + uint64_t alternate_aperture_size, 65 + u32 misc_process_properties) 66 66 { 67 67 qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << 68 68 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
··· 35 35 enum cache_policy default_policy, 36 36 enum cache_policy alternate_policy, 37 37 void __user *alternate_aperture_base, 38 - uint64_t alternate_aperture_size); 38 + uint64_t alternate_aperture_size, 39 + u32 misc_process_properties); 39 40 40 41 void device_queue_manager_init_v11( 41 42 struct device_queue_manager_asic_ops *asic_ops) ··· 61 60 enum cache_policy default_policy, 62 61 enum cache_policy alternate_policy, 63 62 void __user *alternate_aperture_base, 64 - uint64_t alternate_aperture_size) 63 + uint64_t alternate_aperture_size, 64 + u32 misc_process_properties) 65 65 { 66 66 qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << 67 67 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
··· 35 35 enum cache_policy default_policy, 36 36 enum cache_policy alternate_policy, 37 37 void __user *alternate_aperture_base, 38 - uint64_t alternate_aperture_size); 38 + uint64_t alternate_aperture_size, 39 + u32 misc_process_properties); 39 40 40 41 void device_queue_manager_init_v12( 41 42 struct device_queue_manager_asic_ops *asic_ops) ··· 61 60 enum cache_policy default_policy, 62 61 enum cache_policy alternate_policy, 63 62 void __user *alternate_aperture_base, 64 - uint64_t alternate_aperture_size) 63 + uint64_t alternate_aperture_size, 64 + u32 misc_process_properties) 65 65 { 66 66 qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED << 67 67 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+9 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
··· 35 35 enum cache_policy default_policy, 36 36 enum cache_policy alternate_policy, 37 37 void __user *alternate_aperture_base, 38 - uint64_t alternate_aperture_size); 38 + uint64_t alternate_aperture_size, 39 + u32 misc_process_properties); 39 40 40 41 void device_queue_manager_init_v9( 41 42 struct device_queue_manager_asic_ops *asic_ops) ··· 61 60 enum cache_policy default_policy, 62 61 enum cache_policy alternate_policy, 63 62 void __user *alternate_aperture_base, 64 - uint64_t alternate_aperture_size) 63 + uint64_t alternate_aperture_size, 64 + u32 misc_process_properties) 65 65 { 66 66 qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << 67 67 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; ··· 74 72 KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || 75 73 KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) 76 74 qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); 75 + 76 + if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) { 77 + if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION) 78 + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT; 79 + } 77 80 78 81 qpd->sh_mem_ape1_limit = 0; 79 82 qpd->sh_mem_ape1_base = 0;
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
··· 40 40 enum cache_policy default_policy, 41 41 enum cache_policy alternate_policy, 42 42 void __user *alternate_aperture_base, 43 - uint64_t alternate_aperture_size); 43 + uint64_t alternate_aperture_size, 44 + u32 misc_process_properties); 44 45 static int update_qpd_vi(struct device_queue_manager *dqm, 45 46 struct qcm_process_device *qpd); 46 47 static void init_sdma_vm(struct device_queue_manager *dqm, ··· 90 89 enum cache_policy default_policy, 91 90 enum cache_policy alternate_policy, 92 91 void __user *alternate_aperture_base, 93 - uint64_t alternate_aperture_size) 92 + uint64_t alternate_aperture_size, 93 + u32 misc_process_properties) 94 94 { 95 95 uint32_t default_mtype; 96 96 uint32_t ape1_mtype;
+2
drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h
··· 2261 2261 #define SH_MEM_CONFIG__ADDRESS_MODE__SHIFT 0x0 2262 2262 #define SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT 0x3 2263 2263 #define SH_MEM_CONFIG__F8_MODE__SHIFT 0x8 2264 + #define SH_MEM_CONFIG__PRECISION_MODE__SHIFT 0x9 2264 2265 #define SH_MEM_CONFIG__RETRY_DISABLE__SHIFT 0xc 2265 2266 #define SH_MEM_CONFIG__PRIVATE_NV__SHIFT 0xd 2266 2267 #define SH_MEM_CONFIG__ADDRESS_MODE_MASK 0x00000001L 2267 2268 #define SH_MEM_CONFIG__ALIGNMENT_MODE_MASK 0x00000018L 2268 2269 #define SH_MEM_CONFIG__F8_MODE_MASK 0x00000100L 2270 + #define SH_MEM_CONFIG__PRECISION_MODE_MASK 0x00000200L 2269 2271 #define SH_MEM_CONFIG__RETRY_DISABLE_MASK 0x00001000L 2270 2272 #define SH_MEM_CONFIG__PRIVATE_NV_MASK 0x00002000L 2271 2273 //SP_MFMA_PORTD_RD_CONFIG
+6 -2
include/uapi/linux/kfd_ioctl.h
··· 43 43 * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl 44 44 * - 1.16 - Add contiguous VRAM allocation flag 45 45 * - 1.17 - Add SDMA queue creation with target SDMA engine ID 46 + * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag 46 47 */ 47 48 #define KFD_IOCTL_MAJOR_VERSION 1 48 - #define KFD_IOCTL_MINOR_VERSION 17 49 + #define KFD_IOCTL_MINOR_VERSION 18 49 50 50 51 struct kfd_ioctl_get_version_args { 51 52 __u32 major_version; /* from KFD */ ··· 151 150 #define KFD_IOC_CACHE_POLICY_COHERENT 0 152 151 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 153 152 153 + /* Misc. per process flags */ 154 + #define KFD_PROC_FLAG_MFMA_HIGH_PRECISION (1 << 0) 155 + 154 156 struct kfd_ioctl_set_memory_policy_args { 155 157 __u64 alternate_aperture_base; /* to KFD */ 156 158 __u64 alternate_aperture_size; /* to KFD */ ··· 161 157 __u32 gpu_id; /* to KFD */ 162 158 __u32 default_policy; /* to KFD */ 163 159 __u32 alternate_policy; /* to KFD */ 164 - __u32 pad; 160 + __u32 misc_process_flag; /* to KFD */ 165 161 }; 166 162 167 163 /*