Merge tag 'drm-intel-gt-next-2022-07-13' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

+189

Documentation/gpu/rfc/i915_small_bar.h

··· 1 + /** 2 + * struct __drm_i915_memory_region_info - Describes one region as known to the 3 + * driver. 4 + * 5 + * Note this is using both struct drm_i915_query_item and struct drm_i915_query. 6 + * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS 7 + * at &drm_i915_query_item.query_id. 8 + */ 9 + struct __drm_i915_memory_region_info { 10 + /** @region: The class:instance pair encoding */ 11 + struct drm_i915_gem_memory_class_instance region; 12 + 13 + /** @rsvd0: MBZ */ 14 + __u32 rsvd0; 15 + 16 + /** 17 + * @probed_size: Memory probed by the driver 18 + * 19 + * Note that it should not be possible to ever encounter a zero value 20 + * here, also note that no current region type will ever return -1 here. 21 + * Although for future region types, this might be a possibility. The 22 + * same applies to the other size fields. 23 + */ 24 + __u64 probed_size; 25 + 26 + /** 27 + * @unallocated_size: Estimate of memory remaining 28 + * 29 + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. 30 + * Without this (or if this is an older kernel) the value here will 31 + * always equal the @probed_size. Note this is only currently tracked 32 + * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here 33 + * will always equal the @probed_size). 34 + */ 35 + __u64 unallocated_size; 36 + 37 + union { 38 + /** @rsvd1: MBZ */ 39 + __u64 rsvd1[8]; 40 + struct { 41 + /** 42 + * @probed_cpu_visible_size: Memory probed by the driver 43 + * that is CPU accessible. 44 + * 45 + * This will be always be <= @probed_size, and the 46 + * remainder (if there is any) will not be CPU 47 + * accessible. 48 + * 49 + * On systems without small BAR, the @probed_size will 50 + * always equal the @probed_cpu_visible_size, since all 51 + * of it will be CPU accessible. 52 + * 53 + * Note this is only tracked for 54 + * I915_MEMORY_CLASS_DEVICE regions (for other types the 55 + * value here will always equal the @probed_size). 56 + * 57 + * Note that if the value returned here is zero, then 58 + * this must be an old kernel which lacks the relevant 59 + * small-bar uAPI support (including 60 + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on 61 + * such systems we should never actually end up with a 62 + * small BAR configuration, assuming we are able to load 63 + * the kernel module. Hence it should be safe to treat 64 + * this the same as when @probed_cpu_visible_size == 65 + * @probed_size. 66 + */ 67 + __u64 probed_cpu_visible_size; 68 + 69 + /** 70 + * @unallocated_cpu_visible_size: Estimate of CPU 71 + * visible memory remaining 72 + * 73 + * Note this is only tracked for 74 + * I915_MEMORY_CLASS_DEVICE regions (for other types the 75 + * value here will always equal the 76 + * @probed_cpu_visible_size). 77 + * 78 + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable 79 + * accounting. Without this the value here will always 80 + * equal the @probed_cpu_visible_size. Note this is only 81 + * currently tracked for I915_MEMORY_CLASS_DEVICE 82 + * regions (for other types the value here will also 83 + * always equal the @probed_cpu_visible_size). 84 + * 85 + * If this is an older kernel the value here will be 86 + * zero, see also @probed_cpu_visible_size. 87 + */ 88 + __u64 unallocated_cpu_visible_size; 89 + }; 90 + }; 91 + }; 92 + 93 + /** 94 + * struct __drm_i915_gem_create_ext - Existing gem_create behaviour, with added 95 + * extension support using struct i915_user_extension. 96 + * 97 + * Note that new buffer flags should be added here, at least for the stuff that 98 + * is immutable. Previously we would have two ioctls, one to create the object 99 + * with gem_create, and another to apply various parameters, however this 100 + * creates some ambiguity for the params which are considered immutable. Also in 101 + * general we're phasing out the various SET/GET ioctls. 102 + */ 103 + struct __drm_i915_gem_create_ext { 104 + /** 105 + * @size: Requested size for the object. 106 + * 107 + * The (page-aligned) allocated size for the object will be returned. 108 + * 109 + * Note that for some devices we have might have further minimum 110 + * page-size restrictions (larger than 4K), like for device local-memory. 111 + * However in general the final size here should always reflect any 112 + * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS 113 + * extension to place the object in device local-memory. The kernel will 114 + * always select the largest minimum page-size for the set of possible 115 + * placements as the value to use when rounding up the @size. 116 + */ 117 + __u64 size; 118 + 119 + /** 120 + * @handle: Returned handle for the object. 121 + * 122 + * Object handles are nonzero. 123 + */ 124 + __u32 handle; 125 + 126 + /** 127 + * @flags: Optional flags. 128 + * 129 + * Supported values: 130 + * 131 + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that 132 + * the object will need to be accessed via the CPU. 133 + * 134 + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only 135 + * strictly required on configurations where some subset of the device 136 + * memory is directly visible/mappable through the CPU (which we also 137 + * call small BAR), like on some DG2+ systems. Note that this is quite 138 + * undesirable, but due to various factors like the client CPU, BIOS etc 139 + * it's something we can expect to see in the wild. See 140 + * &__drm_i915_memory_region_info.probed_cpu_visible_size for how to 141 + * determine if this system applies. 142 + * 143 + * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to 144 + * ensure the kernel can always spill the allocation to system memory, 145 + * if the object can't be allocated in the mappable part of 146 + * I915_MEMORY_CLASS_DEVICE. 147 + * 148 + * Also note that since the kernel only supports flat-CCS on objects 149 + * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore 150 + * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with 151 + * flat-CCS. 152 + * 153 + * Without this hint, the kernel will assume that non-mappable 154 + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the 155 + * kernel can still migrate the object to the mappable part, as a last 156 + * resort, if userspace ever CPU faults this object, but this might be 157 + * expensive, and so ideally should be avoided. 158 + * 159 + * On older kernels which lack the relevant small-bar uAPI support (see 160 + * also &__drm_i915_memory_region_info.probed_cpu_visible_size), 161 + * usage of the flag will result in an error, but it should NEVER be 162 + * possible to end up with a small BAR configuration, assuming we can 163 + * also successfully load the i915 kernel module. In such cases the 164 + * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as 165 + * such there are zero restrictions on where the object can be placed. 166 + */ 167 + #define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) 168 + __u32 flags; 169 + 170 + /** 171 + * @extensions: The chain of extensions to apply to this object. 172 + * 173 + * This will be useful in the future when we need to support several 174 + * different extensions, and we need to apply more than one when 175 + * creating the object. See struct i915_user_extension. 176 + * 177 + * If we don't supply any extensions then we get the same old gem_create 178 + * behaviour. 179 + * 180 + * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see 181 + * struct drm_i915_gem_create_ext_memory_regions. 182 + * 183 + * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see 184 + * struct drm_i915_gem_create_ext_protected_content. 185 + */ 186 + #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 187 + #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 188 + __u64 extensions; 189 + };

+47

Documentation/gpu/rfc/i915_small_bar.rst

··· 1 + ========================== 2 + I915 Small BAR RFC Section 3 + ========================== 4 + Starting from DG2 we will have resizable BAR support for device local-memory(i.e 5 + I915_MEMORY_CLASS_DEVICE), but in some cases the final BAR size might still be 6 + smaller than the total probed_size. In such cases, only some subset of 7 + I915_MEMORY_CLASS_DEVICE will be CPU accessible(for example the first 256M), 8 + while the remainder is only accessible via the GPU. 9 + 10 + I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag 11 + ---------------------------------------------- 12 + New gem_create_ext flag to tell the kernel that a BO will require CPU access. 13 + This becomes important when placing an object in I915_MEMORY_CLASS_DEVICE, where 14 + underneath the device has a small BAR, meaning only some portion of it is CPU 15 + accessible. Without this flag the kernel will assume that CPU access is not 16 + required, and prioritize using the non-CPU visible portion of 17 + I915_MEMORY_CLASS_DEVICE. 18 + 19 + .. kernel-doc:: Documentation/gpu/rfc/i915_small_bar.h 20 + :functions: __drm_i915_gem_create_ext 21 + 22 + probed_cpu_visible_size attribute 23 + --------------------------------- 24 + New struct__drm_i915_memory_region attribute which returns the total size of the 25 + CPU accessible portion, for the particular region. This should only be 26 + applicable for I915_MEMORY_CLASS_DEVICE. We also report the 27 + unallocated_cpu_visible_size, alongside the unallocated_size. 28 + 29 + Vulkan will need this as part of creating a separate VkMemoryHeap with the 30 + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT set, to represent the CPU visible portion, 31 + where the total size of the heap needs to be known. It also wants to be able to 32 + give a rough estimate of how memory can potentially be allocated. 33 + 34 + .. kernel-doc:: Documentation/gpu/rfc/i915_small_bar.h 35 + :functions: __drm_i915_memory_region_info 36 + 37 + Error Capture restrictions 38 + -------------------------- 39 + With error capture we have two new restrictions: 40 + 41 + 1) Error capture is best effort on small BAR systems; if the pages are not 42 + CPU accessible, at the time of capture, then the kernel is free to skip 43 + trying to capture them. 44 + 45 + 2) On discrete and newer integrated platforms we now reject error capture 46 + on recoverable contexts. In the future the kernel may want to blit during 47 + error capture, when for example something is not currently CPU accessible.

+291

Documentation/gpu/rfc/i915_vm_bind.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + /** 7 + * DOC: I915_PARAM_VM_BIND_VERSION 8 + * 9 + * VM_BIND feature version supported. 10 + * See typedef drm_i915_getparam_t param. 11 + * 12 + * Specifies the VM_BIND feature version supported. 13 + * The following versions of VM_BIND have been defined: 14 + * 15 + * 0: No VM_BIND support. 16 + * 17 + * 1: In VM_UNBIND calls, the UMD must specify the exact mappings created 18 + * previously with VM_BIND, the ioctl will not support unbinding multiple 19 + * mappings or splitting them. Similarly, VM_BIND calls will not replace 20 + * any existing mappings. 21 + * 22 + * 2: The restrictions on unbinding partial or multiple mappings is 23 + * lifted, Similarly, binding will replace any mappings in the given range. 24 + * 25 + * See struct drm_i915_gem_vm_bind and struct drm_i915_gem_vm_unbind. 26 + */ 27 + #define I915_PARAM_VM_BIND_VERSION 57 28 + 29 + /** 30 + * DOC: I915_VM_CREATE_FLAGS_USE_VM_BIND 31 + * 32 + * Flag to opt-in for VM_BIND mode of binding during VM creation. 33 + * See struct drm_i915_gem_vm_control flags. 34 + * 35 + * The older execbuf2 ioctl will not support VM_BIND mode of operation. 36 + * For VM_BIND mode, we have new execbuf3 ioctl which will not accept any 37 + * execlist (See struct drm_i915_gem_execbuffer3 for more details). 38 + */ 39 + #define I915_VM_CREATE_FLAGS_USE_VM_BIND (1 << 0) 40 + 41 + /* VM_BIND related ioctls */ 42 + #define DRM_I915_GEM_VM_BIND 0x3d 43 + #define DRM_I915_GEM_VM_UNBIND 0x3e 44 + #define DRM_I915_GEM_EXECBUFFER3 0x3f 45 + 46 + #define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind) 47 + #define DRM_IOCTL_I915_GEM_VM_UNBIND DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind) 48 + #define DRM_IOCTL_I915_GEM_EXECBUFFER3 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER3, struct drm_i915_gem_execbuffer3) 49 + 50 + /** 51 + * struct drm_i915_gem_timeline_fence - An input or output timeline fence. 52 + * 53 + * The operation will wait for input fence to signal. 54 + * 55 + * The returned output fence will be signaled after the completion of the 56 + * operation. 57 + */ 58 + struct drm_i915_gem_timeline_fence { 59 + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ 60 + __u32 handle; 61 + 62 + /** 63 + * @flags: Supported flags are: 64 + * 65 + * I915_TIMELINE_FENCE_WAIT: 66 + * Wait for the input fence before the operation. 67 + * 68 + * I915_TIMELINE_FENCE_SIGNAL: 69 + * Return operation completion fence as output. 70 + */ 71 + __u32 flags; 72 + #define I915_TIMELINE_FENCE_WAIT (1 << 0) 73 + #define I915_TIMELINE_FENCE_SIGNAL (1 << 1) 74 + #define __I915_TIMELINE_FENCE_UNKNOWN_FLAGS (-(I915_TIMELINE_FENCE_SIGNAL << 1)) 75 + 76 + /** 77 + * @value: A point in the timeline. 78 + * Value must be 0 for a binary drm_syncobj. A Value of 0 for a 79 + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a 80 + * binary one. 81 + */ 82 + __u64 value; 83 + }; 84 + 85 + /** 86 + * struct drm_i915_gem_vm_bind - VA to object mapping to bind. 87 + * 88 + * This structure is passed to VM_BIND ioctl and specifies the mapping of GPU 89 + * virtual address (VA) range to the section of an object that should be bound 90 + * in the device page table of the specified address space (VM). 91 + * The VA range specified must be unique (ie., not currently bound) and can 92 + * be mapped to whole object or a section of the object (partial binding). 93 + * Multiple VA mappings can be created to the same section of the object 94 + * (aliasing). 95 + * 96 + * The @start, @offset and @length must be 4K page aligned. However the DG2 97 + * and XEHPSDV has 64K page size for device local memory and has compact page 98 + * table. On those platforms, for binding device local-memory objects, the 99 + * @start, @offset and @length must be 64K aligned. Also, UMDs should not mix 100 + * the local memory 64K page and the system memory 4K page bindings in the same 101 + * 2M range. 102 + * 103 + * Error code -EINVAL will be returned if @start, @offset and @length are not 104 + * properly aligned. In version 1 (See I915_PARAM_VM_BIND_VERSION), error code 105 + * -ENOSPC will be returned if the VA range specified can't be reserved. 106 + * 107 + * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently 108 + * are not ordered. Furthermore, parts of the VM_BIND operation can be done 109 + * asynchronously, if valid @fence is specified. 110 + */ 111 + struct drm_i915_gem_vm_bind { 112 + /** @vm_id: VM (address space) id to bind */ 113 + __u32 vm_id; 114 + 115 + /** @handle: Object handle */ 116 + __u32 handle; 117 + 118 + /** @start: Virtual Address start to bind */ 119 + __u64 start; 120 + 121 + /** @offset: Offset in object to bind */ 122 + __u64 offset; 123 + 124 + /** @length: Length of mapping to bind */ 125 + __u64 length; 126 + 127 + /** 128 + * @flags: Supported flags are: 129 + * 130 + * I915_GEM_VM_BIND_CAPTURE: 131 + * Capture this mapping in the dump upon GPU error. 132 + * 133 + * Note that @fence carries its own flags. 134 + */ 135 + __u64 flags; 136 + #define I915_GEM_VM_BIND_CAPTURE (1 << 0) 137 + 138 + /** 139 + * @fence: Timeline fence for bind completion signaling. 140 + * 141 + * Timeline fence is of format struct drm_i915_gem_timeline_fence. 142 + * 143 + * It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag 144 + * is invalid, and an error will be returned. 145 + * 146 + * If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence 147 + * is not requested and binding is completed synchronously. 148 + */ 149 + struct drm_i915_gem_timeline_fence fence; 150 + 151 + /** 152 + * @extensions: Zero-terminated chain of extensions. 153 + * 154 + * For future extensions. See struct i915_user_extension. 155 + */ 156 + __u64 extensions; 157 + }; 158 + 159 + /** 160 + * struct drm_i915_gem_vm_unbind - VA to object mapping to unbind. 161 + * 162 + * This structure is passed to VM_UNBIND ioctl and specifies the GPU virtual 163 + * address (VA) range that should be unbound from the device page table of the 164 + * specified address space (VM). VM_UNBIND will force unbind the specified 165 + * range from device page table without waiting for any GPU job to complete. 166 + * It is UMDs responsibility to ensure the mapping is no longer in use before 167 + * calling VM_UNBIND. 168 + * 169 + * If the specified mapping is not found, the ioctl will simply return without 170 + * any error. 171 + * 172 + * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently 173 + * are not ordered. Furthermore, parts of the VM_UNBIND operation can be done 174 + * asynchronously, if valid @fence is specified. 175 + */ 176 + struct drm_i915_gem_vm_unbind { 177 + /** @vm_id: VM (address space) id to bind */ 178 + __u32 vm_id; 179 + 180 + /** @rsvd: Reserved, MBZ */ 181 + __u32 rsvd; 182 + 183 + /** @start: Virtual Address start to unbind */ 184 + __u64 start; 185 + 186 + /** @length: Length of mapping to unbind */ 187 + __u64 length; 188 + 189 + /** 190 + * @flags: Currently reserved, MBZ. 191 + * 192 + * Note that @fence carries its own flags. 193 + */ 194 + __u64 flags; 195 + 196 + /** 197 + * @fence: Timeline fence for unbind completion signaling. 198 + * 199 + * Timeline fence is of format struct drm_i915_gem_timeline_fence. 200 + * 201 + * It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag 202 + * is invalid, and an error will be returned. 203 + * 204 + * If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence 205 + * is not requested and unbinding is completed synchronously. 206 + */ 207 + struct drm_i915_gem_timeline_fence fence; 208 + 209 + /** 210 + * @extensions: Zero-terminated chain of extensions. 211 + * 212 + * For future extensions. See struct i915_user_extension. 213 + */ 214 + __u64 extensions; 215 + }; 216 + 217 + /** 218 + * struct drm_i915_gem_execbuffer3 - Structure for DRM_I915_GEM_EXECBUFFER3 219 + * ioctl. 220 + * 221 + * DRM_I915_GEM_EXECBUFFER3 ioctl only works in VM_BIND mode and VM_BIND mode 222 + * only works with this ioctl for submission. 223 + * See I915_VM_CREATE_FLAGS_USE_VM_BIND. 224 + */ 225 + struct drm_i915_gem_execbuffer3 { 226 + /** 227 + * @ctx_id: Context id 228 + * 229 + * Only contexts with user engine map are allowed. 230 + */ 231 + __u32 ctx_id; 232 + 233 + /** 234 + * @engine_idx: Engine index 235 + * 236 + * An index in the user engine map of the context specified by @ctx_id. 237 + */ 238 + __u32 engine_idx; 239 + 240 + /** 241 + * @batch_address: Batch gpu virtual address/es. 242 + * 243 + * For normal submission, it is the gpu virtual address of the batch 244 + * buffer. For parallel submission, it is a pointer to an array of 245 + * batch buffer gpu virtual addresses with array size equal to the 246 + * number of (parallel) engines involved in that submission (See 247 + * struct i915_context_engines_parallel_submit). 248 + */ 249 + __u64 batch_address; 250 + 251 + /** @flags: Currently reserved, MBZ */ 252 + __u64 flags; 253 + 254 + /** @rsvd1: Reserved, MBZ */ 255 + __u32 rsvd1; 256 + 257 + /** @fence_count: Number of fences in @timeline_fences array. */ 258 + __u32 fence_count; 259 + 260 + /** 261 + * @timeline_fences: Pointer to an array of timeline fences. 262 + * 263 + * Timeline fences are of format struct drm_i915_gem_timeline_fence. 264 + */ 265 + __u64 timeline_fences; 266 + 267 + /** @rsvd2: Reserved, MBZ */ 268 + __u64 rsvd2; 269 + 270 + /** 271 + * @extensions: Zero-terminated chain of extensions. 272 + * 273 + * For future extensions. See struct i915_user_extension. 274 + */ 275 + __u64 extensions; 276 + }; 277 + 278 + /** 279 + * struct drm_i915_gem_create_ext_vm_private - Extension to make the object 280 + * private to the specified VM. 281 + * 282 + * See struct drm_i915_gem_create_ext. 283 + */ 284 + struct drm_i915_gem_create_ext_vm_private { 285 + #define I915_GEM_CREATE_EXT_VM_PRIVATE 2 286 + /** @base: Extension link. See struct i915_user_extension. */ 287 + struct i915_user_extension base; 288 + 289 + /** @vm_id: Id of the VM to which the object is private */ 290 + __u32 vm_id; 291 + };

+245

Documentation/gpu/rfc/i915_vm_bind.rst

··· 1 + ========================================== 2 + I915 VM_BIND feature design and use cases 3 + ========================================== 4 + 5 + VM_BIND feature 6 + ================ 7 + DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer 8 + objects (BOs) or sections of a BOs at specified GPU virtual addresses on a 9 + specified address space (VM). These mappings (also referred to as persistent 10 + mappings) will be persistent across multiple GPU submissions (execbuf calls) 11 + issued by the UMD, without user having to provide a list of all required 12 + mappings during each submission (as required by older execbuf mode). 13 + 14 + The VM_BIND/UNBIND calls allow UMDs to request a timeline out fence for 15 + signaling the completion of bind/unbind operation. 16 + 17 + VM_BIND feature is advertised to user via I915_PARAM_VM_BIND_VERSION. 18 + User has to opt-in for VM_BIND mode of binding for an address space (VM) 19 + during VM creation time via I915_VM_CREATE_FLAGS_USE_VM_BIND extension. 20 + 21 + VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently are 22 + not ordered. Furthermore, parts of the VM_BIND/UNBIND operations can be done 23 + asynchronously, when valid out fence is specified. 24 + 25 + VM_BIND features include: 26 + 27 + * Multiple Virtual Address (VA) mappings can map to the same physical pages 28 + of an object (aliasing). 29 + * VA mapping can map to a partial section of the BO (partial binding). 30 + * Support capture of persistent mappings in the dump upon GPU error. 31 + * Support for userptr gem objects (no special uapi is required for this). 32 + 33 + TLB flush consideration 34 + ------------------------ 35 + The i915 driver flushes the TLB for each submission and when an object's 36 + pages are released. The VM_BIND/UNBIND operation will not do any additional 37 + TLB flush. Any VM_BIND mapping added will be in the working set for subsequent 38 + submissions on that VM and will not be in the working set for currently running 39 + batches (which would require additional TLB flushes, which is not supported). 40 + 41 + Execbuf ioctl in VM_BIND mode 42 + ------------------------------- 43 + A VM in VM_BIND mode will not support older execbuf mode of binding. 44 + The execbuf ioctl handling in VM_BIND mode differs significantly from the 45 + older execbuf2 ioctl (See struct drm_i915_gem_execbuffer2). 46 + Hence, a new execbuf3 ioctl has been added to support VM_BIND mode. (See 47 + struct drm_i915_gem_execbuffer3). The execbuf3 ioctl will not accept any 48 + execlist. Hence, no support for implicit sync. It is expected that the below 49 + work will be able to support requirements of object dependency setting in all 50 + use cases: 51 + 52 + "dma-buf: Add an API for exporting sync files" 53 + (https://lwn.net/Articles/859290/) 54 + 55 + The new execbuf3 ioctl only works in VM_BIND mode and the VM_BIND mode only 56 + works with execbuf3 ioctl for submission. All BOs mapped on that VM (through 57 + VM_BIND call) at the time of execbuf3 call are deemed required for that 58 + submission. 59 + 60 + The execbuf3 ioctl directly specifies the batch addresses instead of as 61 + object handles as in execbuf2 ioctl. The execbuf3 ioctl will also not 62 + support many of the older features like in/out/submit fences, fence array, 63 + default gem context and many more (See struct drm_i915_gem_execbuffer3). 64 + 65 + In VM_BIND mode, VA allocation is completely managed by the user instead of 66 + the i915 driver. Hence all VA assignment, eviction are not applicable in 67 + VM_BIND mode. Also, for determining object activeness, VM_BIND mode will not 68 + be using the i915_vma active reference tracking. It will instead use dma-resv 69 + object for that (See `VM_BIND dma_resv usage`_). 70 + 71 + So, a lot of existing code supporting execbuf2 ioctl, like relocations, VA 72 + evictions, vma lookup table, implicit sync, vma active reference tracking etc., 73 + are not applicable for execbuf3 ioctl. Hence, all execbuf3 specific handling 74 + should be in a separate file and only functionalities common to these ioctls 75 + can be the shared code where possible. 76 + 77 + VM_PRIVATE objects 78 + ------------------- 79 + By default, BOs can be mapped on multiple VMs and can also be dma-buf 80 + exported. Hence these BOs are referred to as Shared BOs. 81 + During each execbuf submission, the request fence must be added to the 82 + dma-resv fence list of all shared BOs mapped on the VM. 83 + 84 + VM_BIND feature introduces an optimization where user can create BO which 85 + is private to a specified VM via I915_GEM_CREATE_EXT_VM_PRIVATE flag during 86 + BO creation. Unlike Shared BOs, these VM private BOs can only be mapped on 87 + the VM they are private to and can't be dma-buf exported. 88 + All private BOs of a VM share the dma-resv object. Hence during each execbuf 89 + submission, they need only one dma-resv fence list updated. Thus, the fast 90 + path (where required mappings are already bound) submission latency is O(1) 91 + w.r.t the number of VM private BOs. 92 + 93 + VM_BIND locking hirarchy 94 + ------------------------- 95 + The locking design here supports the older (execlist based) execbuf mode, the 96 + newer VM_BIND mode, the VM_BIND mode with GPU page faults and possible future 97 + system allocator support (See `Shared Virtual Memory (SVM) support`_). 98 + The older execbuf mode and the newer VM_BIND mode without page faults manages 99 + residency of backing storage using dma_fence. The VM_BIND mode with page faults 100 + and the system allocator support do not use any dma_fence at all. 101 + 102 + VM_BIND locking order is as below. 103 + 104 + 1) Lock-A: A vm_bind mutex will protect vm_bind lists. This lock is taken in 105 + vm_bind/vm_unbind ioctl calls, in the execbuf path and while releasing the 106 + mapping. 107 + 108 + In future, when GPU page faults are supported, we can potentially use a 109 + rwsem instead, so that multiple page fault handlers can take the read side 110 + lock to lookup the mapping and hence can run in parallel. 111 + The older execbuf mode of binding do not need this lock. 112 + 113 + 2) Lock-B: The object's dma-resv lock will protect i915_vma state and needs to 114 + be held while binding/unbinding a vma in the async worker and while updating 115 + dma-resv fence list of an object. Note that private BOs of a VM will all 116 + share a dma-resv object. 117 + 118 + The future system allocator support will use the HMM prescribed locking 119 + instead. 120 + 121 + 3) Lock-C: Spinlock/s to protect some of the VM's lists like the list of 122 + invalidated vmas (due to eviction and userptr invalidation) etc. 123 + 124 + When GPU page faults are supported, the execbuf path do not take any of these 125 + locks. There we will simply smash the new batch buffer address into the ring and 126 + then tell the scheduler run that. The lock taking only happens from the page 127 + fault handler, where we take lock-A in read mode, whichever lock-B we need to 128 + find the backing storage (dma_resv lock for gem objects, and hmm/core mm for 129 + system allocator) and some additional locks (lock-D) for taking care of page 130 + table races. Page fault mode should not need to ever manipulate the vm lists, 131 + so won't ever need lock-C. 132 + 133 + VM_BIND LRU handling 134 + --------------------- 135 + We need to ensure VM_BIND mapped objects are properly LRU tagged to avoid 136 + performance degradation. We will also need support for bulk LRU movement of 137 + VM_BIND objects to avoid additional latencies in execbuf path. 138 + 139 + The page table pages are similar to VM_BIND mapped objects (See 140 + `Evictable page table allocations`_) and are maintained per VM and needs to 141 + be pinned in memory when VM is made active (ie., upon an execbuf call with 142 + that VM). So, bulk LRU movement of page table pages is also needed. 143 + 144 + VM_BIND dma_resv usage 145 + ----------------------- 146 + Fences needs to be added to all VM_BIND mapped objects. During each execbuf 147 + submission, they are added with DMA_RESV_USAGE_BOOKKEEP usage to prevent 148 + over sync (See enum dma_resv_usage). One can override it with either 149 + DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE usage during explicit object 150 + dependency setting. 151 + 152 + Note that DRM_I915_GEM_WAIT and DRM_I915_GEM_BUSY ioctls do not check for 153 + DMA_RESV_USAGE_BOOKKEEP usage and hence should not be used for end of batch 154 + check. Instead, the execbuf3 out fence should be used for end of batch check 155 + (See struct drm_i915_gem_execbuffer3). 156 + 157 + Also, in VM_BIND mode, use dma-resv apis for determining object activeness 158 + (See dma_resv_test_signaled() and dma_resv_wait_timeout()) and do not use the 159 + older i915_vma active reference tracking which is deprecated. This should be 160 + easier to get it working with the current TTM backend. 161 + 162 + Mesa use case 163 + -------------- 164 + VM_BIND can potentially reduce the CPU overhead in Mesa (both Vulkan and Iris), 165 + hence improving performance of CPU-bound applications. It also allows us to 166 + implement Vulkan's Sparse Resources. With increasing GPU hardware performance, 167 + reducing CPU overhead becomes more impactful. 168 + 169 + 170 + Other VM_BIND use cases 171 + ======================== 172 + 173 + Long running Compute contexts 174 + ------------------------------ 175 + Usage of dma-fence expects that they complete in reasonable amount of time. 176 + Compute on the other hand can be long running. Hence it is appropriate for 177 + compute to use user/memory fence (See `User/Memory Fence`_) and dma-fence usage 178 + must be limited to in-kernel consumption only. 179 + 180 + Where GPU page faults are not available, kernel driver upon buffer invalidation 181 + will initiate a suspend (preemption) of long running context, finish the 182 + invalidation, revalidate the BO and then resume the compute context. This is 183 + done by having a per-context preempt fence which is enabled when someone tries 184 + to wait on it and triggers the context preemption. 185 + 186 + User/Memory Fence 187 + ~~~~~~~~~~~~~~~~~~ 188 + User/Memory fence is a <address, value> pair. To signal the user fence, the 189 + specified value will be written at the specified virtual address and wakeup the 190 + waiting process. User fence can be signaled either by the GPU or kernel async 191 + worker (like upon bind completion). User can wait on a user fence with a new 192 + user fence wait ioctl. 193 + 194 + Here is some prior work on this: 195 + https://patchwork.freedesktop.org/patch/349417/ 196 + 197 + Low Latency Submission 198 + ~~~~~~~~~~~~~~~~~~~~~~~ 199 + Allows compute UMD to directly submit GPU jobs instead of through execbuf 200 + ioctl. This is made possible by VM_BIND is not being synchronized against 201 + execbuf. VM_BIND allows bind/unbind of mappings required for the directly 202 + submitted jobs. 203 + 204 + Debugger 205 + --------- 206 + With debug event interface user space process (debugger) is able to keep track 207 + of and act upon resources created by another process (debugged) and attached 208 + to GPU via vm_bind interface. 209 + 210 + GPU page faults 211 + ---------------- 212 + GPU page faults when supported (in future), will only be supported in the 213 + VM_BIND mode. While both the older execbuf mode and the newer VM_BIND mode of 214 + binding will require using dma-fence to ensure residency, the GPU page faults 215 + mode when supported, will not use any dma-fence as residency is purely managed 216 + by installing and removing/invalidating page table entries. 217 + 218 + Page level hints settings 219 + -------------------------- 220 + VM_BIND allows any hints setting per mapping instead of per BO. Possible hints 221 + include placement and atomicity. Sub-BO level placement hint will be even more 222 + relevant with upcoming GPU on-demand page fault support. 223 + 224 + Page level Cache/CLOS settings 225 + ------------------------------- 226 + VM_BIND allows cache/CLOS settings per mapping instead of per BO. 227 + 228 + Evictable page table allocations 229 + --------------------------------- 230 + Make pagetable allocations evictable and manage them similar to VM_BIND 231 + mapped objects. Page table pages are similar to persistent mappings of a 232 + VM (difference here are that the page table pages will not have an i915_vma 233 + structure and after swapping pages back in, parent page link needs to be 234 + updated). 235 + 236 + Shared Virtual Memory (SVM) support 237 + ------------------------------------ 238 + VM_BIND interface can be used to map system memory directly (without gem BO 239 + abstraction) using the HMM interface. SVM is only supported with GPU page 240 + faults enabled. 241 + 242 + VM_BIND UAPI 243 + ============= 244 + 245 + .. kernel-doc:: Documentation/gpu/rfc/i915_vm_bind.h

+8

Documentation/gpu/rfc/index.rst

··· 23 23 .. toctree:: 24 24 25 25 i915_scheduler.rst 26 + 27 + .. toctree:: 28 + 29 + i915_small_bar.rst 30 + 31 + .. toctree:: 32 + 33 + i915_vm_bind.rst

+19 -1

drivers/gpu/drm/i915/gem/i915_gem_create.c

··· 241 241 struct drm_i915_private *i915; 242 242 struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; 243 243 unsigned int n_placements; 244 + unsigned int placement_mask; 244 245 unsigned long flags; 245 246 }; 246 247 ··· 338 337 for (i = 0; i < args->num_regions; i++) 339 338 ext_data->placements[i] = placements[i]; 340 339 340 + ext_data->placement_mask = mask; 341 341 return 0; 342 342 343 343 out_dump: ··· 413 411 struct drm_i915_gem_object *obj; 414 412 int ret; 415 413 416 - if (args->flags) 414 + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) 417 415 return -EINVAL; 418 416 419 417 ret = i915_user_extensions(u64_to_user_ptr(args->extensions), ··· 427 425 ext_data.placements[0] = 428 426 intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); 429 427 ext_data.n_placements = 1; 428 + } 429 + 430 + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) { 431 + if (ext_data.n_placements == 1) 432 + return -EINVAL; 433 + 434 + /* 435 + * We always need to be able to spill to system memory, if we 436 + * can't place in the mappable part of LMEM. 437 + */ 438 + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM))) 439 + return -EINVAL; 440 + } else { 441 + if (ext_data.n_placements > 1 || 442 + ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM) 443 + ext_data.flags |= I915_BO_ALLOC_GPU_ONLY; 430 444 } 431 445 432 446 obj = __i915_gem_object_create_user_ext(i915, args->size,

+12 -3

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

··· 1951 1951 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 1952 1952 1953 1953 /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ 1954 - static void eb_capture_stage(struct i915_execbuffer *eb) 1954 + static int eb_capture_stage(struct i915_execbuffer *eb) 1955 1955 { 1956 1956 const unsigned int count = eb->buffer_count; 1957 1957 unsigned int i = count, j; ··· 1963 1963 1964 1964 if (!(flags & EXEC_OBJECT_CAPTURE)) 1965 1965 continue; 1966 + 1967 + if (i915_gem_context_is_recoverable(eb->gem_context) && 1968 + (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) 1969 + return -EINVAL; 1966 1970 1967 1971 for_each_batch_create_order(eb, j) { 1968 1972 struct i915_capture_list *capture; ··· 1980 1976 eb->capture_lists[j] = capture; 1981 1977 } 1982 1978 } 1979 + 1980 + return 0; 1983 1981 } 1984 1982 1985 1983 /* Commit once we're in the critical path */ ··· 2023 2017 2024 2018 #else 2025 2019 2026 - static void eb_capture_stage(struct i915_execbuffer *eb) 2020 + static int eb_capture_stage(struct i915_execbuffer *eb) 2027 2021 { 2022 + return 0; 2028 2023 } 2029 2024 2030 2025 static void eb_capture_commit(struct i915_execbuffer *eb) ··· 3417 3410 } 3418 3411 3419 3412 ww_acquire_done(&eb.ww.ctx); 3420 - eb_capture_stage(&eb); 3413 + err = eb_capture_stage(&eb); 3414 + if (err) 3415 + goto err_vma; 3421 3416 3422 3417 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); 3423 3418 if (IS_ERR(out_fence)) {

+47

drivers/gpu/drm/i915/gem/i915_gem_object.c

··· 717 717 return false; 718 718 } 719 719 720 + /** 721 + * i915_gem_object_needs_ccs_pages - Check whether the object requires extra 722 + * pages when placed in system-memory, in order to save and later restore the 723 + * flat-CCS aux state when the object is moved between local-memory and 724 + * system-memory 725 + * @obj: Pointer to the object 726 + * 727 + * Return: True if the object needs extra ccs pages. False otherwise. 728 + */ 729 + bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) 730 + { 731 + bool lmem_placement = false; 732 + int i; 733 + 734 + for (i = 0; i < obj->mm.n_placements; i++) { 735 + /* Compression is not allowed for the objects with smem placement */ 736 + if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) 737 + return false; 738 + if (!lmem_placement && 739 + obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) 740 + lmem_placement = true; 741 + } 742 + 743 + return lmem_placement; 744 + } 745 + 720 746 void i915_gem_init__objects(struct drm_i915_private *i915) 721 747 { 722 748 INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); ··· 809 783 intr, MAX_SCHEDULE_TIMEOUT); 810 784 if (!ret) 811 785 ret = -ETIME; 786 + else if (ret > 0 && i915_gem_object_has_unknown_state(obj)) 787 + ret = -EIO; 812 788 813 789 return ret < 0 ? ret : 0; 790 + } 791 + 792 + /** 793 + * i915_gem_object_has_unknown_state - Return true if the object backing pages are 794 + * in an unknown_state. This means that userspace must NEVER be allowed to touch 795 + * the pages, with either the GPU or CPU. 796 + * 797 + * ONLY valid to be called after ensuring that all kernel fences have signalled 798 + * (in particular the fence for moving/clearing the object). 799 + */ 800 + bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj) 801 + { 802 + /* 803 + * The below barrier pairs with the dma_fence_signal() in 804 + * __memcpy_work(). We should only sample the unknown_state after all 805 + * the kernel fences have signalled. 806 + */ 807 + smp_rmb(); 808 + return obj->mm.unknown_state; 814 809 } 815 810 816 811 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

+3

drivers/gpu/drm/i915/gem/i915_gem_object.h

··· 524 524 struct dma_fence **fence); 525 525 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, 526 526 bool intr); 527 + bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj); 527 528 528 529 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, 529 530 unsigned int cache_level); ··· 617 616 618 617 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, 619 618 enum intel_memory_type type); 619 + 620 + bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj); 620 621 621 622 int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, 622 623 size_t size, struct intel_memory_region *mr,

+18

drivers/gpu/drm/i915/gem/i915_gem_object_types.h

··· 548 548 bool ttm_shrinkable; 549 549 550 550 /** 551 + * @unknown_state: Indicate that the object is effectively 552 + * borked. This is write-once and set if we somehow encounter a 553 + * fatal error when moving/clearing the pages, and we are not 554 + * able to fallback to memcpy/memset, like on small-BAR systems. 555 + * The GPU should also be wedged (or in the process) at this 556 + * point. 557 + * 558 + * Only valid to read this after acquiring the dma-resv lock and 559 + * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled, 560 + * or if we otherwise know that the moving fence has signalled, 561 + * and we are certain the pages underneath are valid for 562 + * immediate access (under normal operation), like just prior to 563 + * binding the object or when setting up the CPU fault handler. 564 + * See i915_gem_object_has_unknown_state(); 565 + */ 566 + bool unknown_state; 567 + 568 + /** 551 569 * Priority list of potential placements for this object. 552 570 */ 553 571 struct intel_memory_region **placements;

+2

drivers/gpu/drm/i915/gem/i915_gem_region.c

··· 60 60 if (page_size) 61 61 default_page_size = page_size; 62 62 63 + /* We should be able to fit a page within an sg entry */ 64 + GEM_BUG_ON(overflows_type(default_page_size, u32)); 63 65 GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); 64 66 GEM_BUG_ON(default_page_size < PAGE_SIZE); 65 67

+34 -21

drivers/gpu/drm/i915/gem/i915_gem_ttm.c

··· 266 266 .release = i915_ttm_tt_release 267 267 }; 268 268 269 - static inline bool 270 - i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) 271 - { 272 - bool lmem_placement = false; 273 - int i; 274 - 275 - for (i = 0; i < obj->mm.n_placements; i++) { 276 - /* Compression is not allowed for the objects with smem placement */ 277 - if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) 278 - return false; 279 - if (!lmem_placement && 280 - obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) 281 - lmem_placement = true; 282 - } 283 - 284 - return lmem_placement; 285 - } 286 - 287 269 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, 288 270 uint32_t page_flags) 289 271 { ··· 602 620 struct ttm_resource *res) 603 621 { 604 622 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 623 + u32 page_alignment; 605 624 606 625 if (!i915_ttm_gtt_binds_lmem(res)) 607 626 return i915_ttm_tt_get_st(bo->ttm); 627 + 628 + page_alignment = bo->page_alignment << PAGE_SHIFT; 629 + if (!page_alignment) 630 + page_alignment = obj->mm.region->min_page_size; 608 631 609 632 /* 610 633 * If CPU mapping differs, we need to add the ttm_tt pages to ··· 621 634 struct i915_refct_sgt *rsgt; 622 635 623 636 rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, 624 - res); 637 + res, 638 + page_alignment); 625 639 if (IS_ERR(rsgt)) 626 640 return rsgt; 627 641 ··· 631 643 return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); 632 644 } 633 645 634 - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); 646 + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, 647 + page_alignment); 635 648 } 636 649 637 650 static int i915_ttm_truncate(struct drm_i915_gem_object *obj) ··· 664 675 i915_ttm_purge(obj); 665 676 } 666 677 667 - static bool i915_ttm_resource_mappable(struct ttm_resource *res) 678 + /** 679 + * i915_ttm_resource_mappable - Return true if the ttm resource is CPU 680 + * accessible. 681 + * @res: The TTM resource to check. 682 + * 683 + * This is interesting on small-BAR systems where we may encounter lmem objects 684 + * that can't be accessed via the CPU. 685 + */ 686 + bool i915_ttm_resource_mappable(struct ttm_resource *res) 668 687 { 669 688 struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); 670 689 ··· 684 687 685 688 static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) 686 689 { 690 + struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); 691 + bool unknown_state; 692 + 693 + if (!obj) 694 + return -EINVAL; 695 + 696 + if (!kref_get_unless_zero(&obj->base.refcount)) 697 + return -EINVAL; 698 + 699 + assert_object_held(obj); 700 + 701 + unknown_state = i915_gem_object_has_unknown_state(obj); 702 + i915_gem_object_put(obj); 703 + if (unknown_state) 704 + return -EINVAL; 705 + 687 706 if (!i915_ttm_cpu_maps_iomem(mem)) 688 707 return 0; 689 708

+3

drivers/gpu/drm/i915/gem/i915_gem_ttm.h

··· 92 92 /* Once / if we support GGTT, this is also false for cached ttm_tts */ 93 93 return mem->mem_type != I915_PL_SYSTEM; 94 94 } 95 + 96 + bool i915_ttm_resource_mappable(struct ttm_resource *res); 97 + 95 98 #endif

+86 -13

drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c

··· 33 33 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 34 34 static bool fail_gpu_migration; 35 35 static bool fail_work_allocation; 36 + static bool ban_memcpy; 36 37 37 38 void i915_ttm_migrate_set_failure_modes(bool gpu_migration, 38 39 bool work_allocation) 39 40 { 40 41 fail_gpu_migration = gpu_migration; 41 42 fail_work_allocation = work_allocation; 43 + } 44 + 45 + void i915_ttm_migrate_set_ban_memcpy(bool ban) 46 + { 47 + ban_memcpy = ban; 42 48 } 43 49 #endif 44 50 ··· 264 258 * from the callback for lockdep reasons. 265 259 * @cb: Callback for the accelerated migration fence. 266 260 * @arg: The argument for the memcpy functionality. 261 + * @i915: The i915 pointer. 262 + * @obj: The GEM object. 263 + * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy 264 + * or memset, we wedge the device and set the @obj unknown_state, to prevent 265 + * further access to the object with the CPU or GPU. On some devices we might 266 + * only be permitted to use the blitter engine for such operations. 267 267 */ 268 268 struct i915_ttm_memcpy_work { 269 269 struct dma_fence fence; 270 270 struct work_struct work; 271 - /* The fence lock */ 272 271 spinlock_t lock; 273 272 struct irq_work irq_work; 274 273 struct dma_fence_cb cb; 275 274 struct i915_ttm_memcpy_arg arg; 275 + struct drm_i915_private *i915; 276 + struct drm_i915_gem_object *obj; 277 + bool memcpy_allowed; 276 278 }; 277 279 278 280 static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) ··· 331 317 struct i915_ttm_memcpy_work *copy_work = 332 318 container_of(work, typeof(*copy_work), work); 333 319 struct i915_ttm_memcpy_arg *arg = &copy_work->arg; 334 - bool cookie = dma_fence_begin_signalling(); 320 + bool cookie; 335 321 336 - i915_ttm_move_memcpy(arg); 322 + /* 323 + * FIXME: We need to take a closer look here. We should be able to plonk 324 + * this into the fence critical section. 325 + */ 326 + if (!copy_work->memcpy_allowed) { 327 + struct intel_gt *gt; 328 + unsigned int id; 329 + 330 + for_each_gt(gt, copy_work->i915, id) 331 + intel_gt_set_wedged(gt); 332 + } 333 + 334 + cookie = dma_fence_begin_signalling(); 335 + 336 + if (copy_work->memcpy_allowed) { 337 + i915_ttm_move_memcpy(arg); 338 + } else { 339 + /* 340 + * Prevent further use of the object. Any future GTT binding or 341 + * CPU access is not allowed once we signal the fence. Outside 342 + * of the fence critical section, we then also then wedge the gpu 343 + * to indicate the device is not functional. 344 + * 345 + * The below dma_fence_signal() is our write-memory-barrier. 346 + */ 347 + copy_work->obj->mm.unknown_state = true; 348 + } 349 + 337 350 dma_fence_end_signalling(cookie); 338 351 339 352 dma_fence_signal(&copy_work->fence); 340 353 341 354 i915_ttm_memcpy_release(arg); 355 + i915_gem_object_put(copy_work->obj); 342 356 dma_fence_put(&copy_work->fence); 343 357 } 344 358 ··· 378 336 379 337 dma_fence_signal(&copy_work->fence); 380 338 i915_ttm_memcpy_release(arg); 339 + i915_gem_object_put(copy_work->obj); 381 340 dma_fence_put(&copy_work->fence); 382 341 } 383 342 ··· 432 389 return &work->fence; 433 390 } 434 391 392 + static bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo, 393 + struct ttm_resource *dst_mem) 394 + { 395 + if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo))) 396 + return false; 397 + 398 + if (!(i915_ttm_resource_mappable(bo->resource) && 399 + i915_ttm_resource_mappable(dst_mem))) 400 + return false; 401 + 402 + return I915_SELFTEST_ONLY(ban_memcpy) ? false : true; 403 + } 404 + 435 405 static struct dma_fence * 436 406 __i915_ttm_move(struct ttm_buffer_object *bo, 437 407 const struct ttm_operation_ctx *ctx, bool clear, ··· 452 396 struct i915_refct_sgt *dst_rsgt, bool allow_accel, 453 397 const struct i915_deps *move_deps) 454 398 { 399 + const bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem); 400 + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 401 + struct drm_i915_private *i915 = to_i915(bo->base.dev); 455 402 struct i915_ttm_memcpy_work *copy_work = NULL; 456 403 struct i915_ttm_memcpy_arg _arg, *arg = &_arg; 457 404 struct dma_fence *fence = ERR_PTR(-EINVAL); ··· 482 423 copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); 483 424 484 425 if (copy_work) { 426 + copy_work->i915 = i915; 427 + copy_work->memcpy_allowed = memcpy_allowed; 428 + copy_work->obj = i915_gem_object_get(obj); 485 429 arg = &copy_work->arg; 486 - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, 487 - dst_rsgt); 430 + if (memcpy_allowed) 431 + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, 432 + dst_ttm, dst_rsgt); 433 + 488 434 fence = i915_ttm_memcpy_work_arm(copy_work, dep); 489 435 } else { 490 436 dma_fence_wait(dep, false); ··· 514 450 } 515 451 516 452 /* Error intercept failed or no accelerated migration to start with */ 517 - if (!copy_work) 518 - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, 519 - dst_rsgt); 520 - i915_ttm_move_memcpy(arg); 521 - i915_ttm_memcpy_release(arg); 453 + 454 + if (memcpy_allowed) { 455 + if (!copy_work) 456 + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, 457 + dst_rsgt); 458 + i915_ttm_move_memcpy(arg); 459 + i915_ttm_memcpy_release(arg); 460 + } 461 + if (copy_work) 462 + i915_gem_object_put(copy_work->obj); 522 463 kfree(copy_work); 523 464 524 - return NULL; 465 + return memcpy_allowed ? NULL : ERR_PTR(-EIO); 525 466 out: 526 467 if (!fence && copy_work) { 527 468 i915_ttm_memcpy_release(arg); 469 + i915_gem_object_put(copy_work->obj); 528 470 kfree(copy_work); 529 471 } 530 472 ··· 609 539 } 610 540 611 541 if (migration_fence) { 612 - ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, 613 - true, dst_mem); 542 + if (I915_SELFTEST_ONLY(evict && fail_gpu_migration)) 543 + ret = -EIO; /* never feed non-migrate fences into ttm */ 544 + else 545 + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, 546 + true, dst_mem); 614 547 if (ret) { 615 548 dma_fence_wait(migration_fence, false); 616 549 ttm_bo_move_sync_cleanup(bo, dst_mem);

+1

drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h

··· 22 22 23 23 I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, 24 24 bool work_allocation)); 25 + I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_ban_memcpy(bool ban)); 25 26 26 27 int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, 27 28 struct drm_i915_gem_object *src,

+34

drivers/gpu/drm/i915/gem/i915_gem_wait.c

··· 9 9 #include <linux/jiffies.h> 10 10 11 11 #include "gt/intel_engine.h" 12 + #include "gt/intel_rps.h" 12 13 13 14 #include "i915_gem_ioctls.h" 14 15 #include "i915_gem_object.h" ··· 32 31 timeout); 33 32 } 34 33 34 + static void 35 + i915_gem_object_boost(struct dma_resv *resv, unsigned int flags) 36 + { 37 + struct dma_resv_iter cursor; 38 + struct dma_fence *fence; 39 + 40 + /* 41 + * Prescan all fences for potential boosting before we begin waiting. 42 + * 43 + * When we wait, we wait on outstanding fences serially. If the 44 + * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced 45 + * form 1:2, then as we look at each wait in turn we see that each 46 + * request is currently executing and not worthy of boosting. But if 47 + * we only happen to look at the final fence in the sequence (because 48 + * of request coalescing or splitting between read/write arrays by 49 + * the iterator), then we would boost. As such our decision to boost 50 + * or not is delicately balanced on the order we wait on fences. 51 + * 52 + * So instead of looking for boosts sequentially, look for all boosts 53 + * upfront and then wait on the outstanding fences. 54 + */ 55 + 56 + dma_resv_iter_begin(&cursor, resv, 57 + dma_resv_usage_rw(flags & I915_WAIT_ALL)); 58 + dma_resv_for_each_fence_unlocked(&cursor, fence) 59 + if (dma_fence_is_i915(fence) && 60 + !i915_request_started(to_request(fence))) 61 + intel_rps_boost(to_request(fence)); 62 + dma_resv_iter_end(&cursor); 63 + } 64 + 35 65 static long 36 66 i915_gem_object_wait_reservation(struct dma_resv *resv, 37 67 unsigned int flags, ··· 71 39 struct dma_resv_iter cursor; 72 40 struct dma_fence *fence; 73 41 long ret = timeout ?: 1; 42 + 43 + i915_gem_object_boost(resv, flags); 74 44 75 45 dma_resv_iter_begin(&cursor, resv, 76 46 dma_resv_usage_rw(flags & I915_WAIT_ALL));

+6 -1

drivers/gpu/drm/i915/gem/selftests/huge_pages.c

··· 1623 1623 struct file *file; 1624 1624 unsigned int flags = PIN_USER; 1625 1625 unsigned int n; 1626 + intel_wakeref_t wf; 1626 1627 bool should_swap; 1627 1628 int err; 1628 1629 ··· 1660 1659 goto out_put; 1661 1660 } 1662 1661 1662 + wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */ 1663 + 1663 1664 err = i915_vma_pin(vma, 0, 0, flags); 1664 1665 if (err) 1665 - goto out_put; 1666 + goto out_wf; 1666 1667 1667 1668 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1668 1669 pr_info("failed to allocate THP, finishing test early\n"); ··· 1735 1732 1736 1733 out_unpin: 1737 1734 i915_vma_unpin(vma); 1735 + out_wf: 1736 + intel_runtime_pm_put(&i915->runtime_pm, wf); 1738 1737 out_put: 1739 1738 i915_gem_object_put(obj); 1740 1739 out_vm:

+119 -22

drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c

··· 9 9 10 10 #include "i915_deps.h" 11 11 12 + #include "selftests/igt_reset.h" 12 13 #include "selftests/igt_spinner.h" 13 14 14 15 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, ··· 110 109 111 110 static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, 112 111 struct drm_i915_gem_object *obj, 113 - struct i915_vma *vma) 112 + struct i915_vma *vma, 113 + bool silent_migrate) 114 114 { 115 115 int err; 116 116 ··· 140 138 if (i915_gem_object_is_lmem(obj)) { 141 139 err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); 142 140 if (err) { 143 - pr_err("Object failed migration to smem\n"); 141 + if (!silent_migrate) 142 + pr_err("Object failed migration to smem\n"); 144 143 if (err) 145 144 return err; 146 145 } ··· 159 156 } else { 160 157 err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0); 161 158 if (err) { 162 - pr_err("Object failed migration to lmem\n"); 159 + if (!silent_migrate) 160 + pr_err("Object failed migration to lmem\n"); 163 161 if (err) 164 162 return err; 165 163 } ··· 183 179 struct i915_address_space *vm, 184 180 struct i915_deps *deps, 185 181 struct igt_spinner *spin, 186 - struct dma_fence *spin_fence) 182 + struct dma_fence *spin_fence, 183 + bool borked_migrate) 187 184 { 188 185 struct drm_i915_private *i915 = gt->i915; 189 186 struct drm_i915_gem_object *obj; ··· 247 242 */ 248 243 for (i = 1; i <= 5; ++i) { 249 244 for_i915_gem_ww(&ww, err, true) 250 - err = lmem_pages_migrate_one(&ww, obj, vma); 245 + err = lmem_pages_migrate_one(&ww, obj, vma, 246 + borked_migrate); 251 247 if (err) 252 248 goto out_put; 253 249 } ··· 289 283 290 284 static int igt_lmem_pages_failsafe_migrate(void *arg) 291 285 { 292 - int fail_gpu, fail_alloc, ret; 286 + int fail_gpu, fail_alloc, ban_memcpy, ret; 293 287 struct intel_gt *gt = arg; 294 288 295 289 for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { 296 290 for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { 297 - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", 298 - fail_gpu, fail_alloc); 299 - i915_ttm_migrate_set_failure_modes(fail_gpu, 300 - fail_alloc); 301 - ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL); 302 - if (ret) 303 - goto out_err; 291 + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { 292 + pr_info("Simulated failure modes: gpu: %d, alloc:%d, ban_memcpy: %d\n", 293 + fail_gpu, fail_alloc, ban_memcpy); 294 + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); 295 + i915_ttm_migrate_set_failure_modes(fail_gpu, 296 + fail_alloc); 297 + ret = __igt_lmem_pages_migrate(gt, NULL, NULL, 298 + NULL, NULL, 299 + ban_memcpy && 300 + fail_gpu); 301 + 302 + if (ban_memcpy && fail_gpu) { 303 + struct intel_gt *__gt; 304 + unsigned int id; 305 + 306 + if (ret != -EIO) { 307 + pr_err("expected -EIO, got (%d)\n", ret); 308 + ret = -EINVAL; 309 + } else { 310 + ret = 0; 311 + } 312 + 313 + for_each_gt(__gt, gt->i915, id) { 314 + intel_wakeref_t wakeref; 315 + bool wedged; 316 + 317 + mutex_lock(&__gt->reset.mutex); 318 + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); 319 + mutex_unlock(&__gt->reset.mutex); 320 + 321 + if (fail_gpu && !fail_alloc) { 322 + if (!wedged) { 323 + pr_err("gt(%u) not wedged\n", id); 324 + ret = -EINVAL; 325 + continue; 326 + } 327 + } else if (wedged) { 328 + pr_err("gt(%u) incorrectly wedged\n", id); 329 + ret = -EINVAL; 330 + } else { 331 + continue; 332 + } 333 + 334 + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); 335 + igt_global_reset_lock(__gt); 336 + intel_gt_reset(__gt, ALL_ENGINES, NULL); 337 + igt_global_reset_unlock(__gt); 338 + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); 339 + } 340 + if (ret) 341 + goto out_err; 342 + } 343 + } 304 344 } 305 345 } 306 346 307 347 out_err: 308 348 i915_ttm_migrate_set_failure_modes(false, false); 349 + i915_ttm_migrate_set_ban_memcpy(false); 309 350 return ret; 310 351 } 311 352 ··· 423 370 goto out_ce; 424 371 425 372 err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin, 426 - spin_fence); 373 + spin_fence, false); 427 374 i915_deps_fini(&deps); 428 375 dma_fence_put(spin_fence); 429 376 if (err) ··· 447 394 #define ASYNC_FAIL_ALLOC 1 448 395 static int igt_lmem_async_migrate(void *arg) 449 396 { 450 - int fail_gpu, fail_alloc, ret; 397 + int fail_gpu, fail_alloc, ban_memcpy, ret; 451 398 struct intel_gt *gt = arg; 452 399 453 400 for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { 454 401 for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) { 455 - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", 456 - fail_gpu, fail_alloc); 457 - i915_ttm_migrate_set_failure_modes(fail_gpu, 458 - fail_alloc); 459 - ret = igt_async_migrate(gt); 460 - if (ret) 461 - goto out_err; 402 + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { 403 + pr_info("Simulated failure modes: gpu: %d, alloc: %d, ban_memcpy: %d\n", 404 + fail_gpu, fail_alloc, ban_memcpy); 405 + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); 406 + i915_ttm_migrate_set_failure_modes(fail_gpu, 407 + fail_alloc); 408 + ret = igt_async_migrate(gt); 409 + 410 + if (fail_gpu && ban_memcpy) { 411 + struct intel_gt *__gt; 412 + unsigned int id; 413 + 414 + if (ret != -EIO) { 415 + pr_err("expected -EIO, got (%d)\n", ret); 416 + ret = -EINVAL; 417 + } else { 418 + ret = 0; 419 + } 420 + 421 + for_each_gt(__gt, gt->i915, id) { 422 + intel_wakeref_t wakeref; 423 + bool wedged; 424 + 425 + mutex_lock(&__gt->reset.mutex); 426 + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); 427 + mutex_unlock(&__gt->reset.mutex); 428 + 429 + if (fail_gpu && !fail_alloc) { 430 + if (!wedged) { 431 + pr_err("gt(%u) not wedged\n", id); 432 + ret = -EINVAL; 433 + continue; 434 + } 435 + } else if (wedged) { 436 + pr_err("gt(%u) incorrectly wedged\n", id); 437 + ret = -EINVAL; 438 + } else { 439 + continue; 440 + } 441 + 442 + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); 443 + igt_global_reset_lock(__gt); 444 + intel_gt_reset(__gt, ALL_ENGINES, NULL); 445 + igt_global_reset_unlock(__gt); 446 + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); 447 + } 448 + } 449 + if (ret) 450 + goto out_err; 451 + } 462 452 } 463 453 } 464 454 465 455 out_err: 466 456 i915_ttm_migrate_set_failure_modes(false, false); 457 + i915_ttm_migrate_set_ban_memcpy(false); 467 458 return ret; 468 459 } 469 460

+85 -2

drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

··· 10 10 #include "gem/i915_gem_internal.h" 11 11 #include "gem/i915_gem_region.h" 12 12 #include "gem/i915_gem_ttm.h" 13 + #include "gem/i915_gem_ttm_move.h" 13 14 #include "gt/intel_engine_pm.h" 14 15 #include "gt/intel_gpu_commands.h" 15 16 #include "gt/intel_gt.h" ··· 22 21 #include "i915_selftest.h" 23 22 #include "selftests/i915_random.h" 24 23 #include "selftests/igt_flush_test.h" 24 + #include "selftests/igt_reset.h" 25 25 #include "selftests/igt_mmap.h" 26 26 27 27 struct tile { ··· 981 979 }; 982 980 int i; 983 981 982 + if (mr->private) 983 + continue; 984 + 984 985 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 985 986 struct drm_i915_gem_object *obj; 986 987 int err; ··· 1165 1160 #define IGT_MMAP_MIGRATE_FILL (1 << 1) 1166 1161 #define IGT_MMAP_MIGRATE_EVICTABLE (1 << 2) 1167 1162 #define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3) 1163 + #define IGT_MMAP_MIGRATE_FAIL_GPU (1 << 4) 1168 1164 static int __igt_mmap_migrate(struct intel_memory_region **placements, 1169 1165 int n_placements, 1170 1166 struct intel_memory_region *expected_mr, ··· 1227 1221 expand32(POISON_INUSE), &rq); 1228 1222 i915_gem_object_unpin_pages(obj); 1229 1223 if (rq) { 1230 - dma_resv_add_fence(obj->base.resv, &rq->fence, 1231 - DMA_RESV_USAGE_KERNEL); 1224 + err = dma_resv_reserve_fences(obj->base.resv, 1); 1225 + if (!err) 1226 + dma_resv_add_fence(obj->base.resv, &rq->fence, 1227 + DMA_RESV_USAGE_KERNEL); 1232 1228 i915_request_put(rq); 1233 1229 } 1234 1230 i915_gem_object_unlock(obj); ··· 1240 1232 if (flags & IGT_MMAP_MIGRATE_EVICTABLE) 1241 1233 igt_make_evictable(&objects); 1242 1234 1235 + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { 1236 + err = i915_gem_object_lock(obj, NULL); 1237 + if (err) 1238 + goto out_put; 1239 + 1240 + /* 1241 + * Ensure we only simulate the gpu failuire when faulting the 1242 + * pages. 1243 + */ 1244 + err = i915_gem_object_wait_moving_fence(obj, true); 1245 + i915_gem_object_unlock(obj); 1246 + if (err) 1247 + goto out_put; 1248 + i915_ttm_migrate_set_failure_modes(true, false); 1249 + } 1250 + 1243 1251 err = ___igt_mmap_migrate(i915, obj, addr, 1244 1252 flags & IGT_MMAP_MIGRATE_UNFAULTABLE); 1253 + 1245 1254 if (!err && obj->mm.region != expected_mr) { 1246 1255 pr_err("%s region mismatch %s\n", __func__, expected_mr->name); 1247 1256 err = -EINVAL; 1257 + } 1258 + 1259 + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { 1260 + struct intel_gt *gt; 1261 + unsigned int id; 1262 + 1263 + i915_ttm_migrate_set_failure_modes(false, false); 1264 + 1265 + for_each_gt(gt, i915, id) { 1266 + intel_wakeref_t wakeref; 1267 + bool wedged; 1268 + 1269 + mutex_lock(&gt->reset.mutex); 1270 + wedged = test_bit(I915_WEDGED, &gt->reset.flags); 1271 + mutex_unlock(&gt->reset.mutex); 1272 + if (!wedged) { 1273 + pr_err("gt(%u) not wedged\n", id); 1274 + err = -EINVAL; 1275 + continue; 1276 + } 1277 + 1278 + wakeref = intel_runtime_pm_get(gt->uncore->rpm); 1279 + igt_global_reset_lock(gt); 1280 + intel_gt_reset(gt, ALL_ENGINES, NULL); 1281 + igt_global_reset_unlock(gt); 1282 + intel_runtime_pm_put(gt->uncore->rpm, wakeref); 1283 + } 1284 + 1285 + if (!i915_gem_object_has_unknown_state(obj)) { 1286 + pr_err("object missing unknown_state\n"); 1287 + err = -EINVAL; 1288 + } 1248 1289 } 1249 1290 1250 1291 out_put: ··· 1375 1318 err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, 1376 1319 IGT_MMAP_MIGRATE_TOPDOWN | 1377 1320 IGT_MMAP_MIGRATE_FILL | 1321 + IGT_MMAP_MIGRATE_UNFAULTABLE); 1322 + if (err) 1323 + goto out_io_size; 1324 + 1325 + /* 1326 + * Allocate in the non-mappable portion, but force migrating to 1327 + * the mappable portion on fault (LMEM -> LMEM). We then also 1328 + * simulate a gpu error when moving the pages when faulting the 1329 + * pages, which should result in wedging the gpu and returning 1330 + * SIGBUS in the fault handler, since we can't fallback to 1331 + * memcpy. 1332 + */ 1333 + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, 1334 + IGT_MMAP_MIGRATE_TOPDOWN | 1335 + IGT_MMAP_MIGRATE_FILL | 1336 + IGT_MMAP_MIGRATE_EVICTABLE | 1337 + IGT_MMAP_MIGRATE_FAIL_GPU | 1378 1338 IGT_MMAP_MIGRATE_UNFAULTABLE); 1379 1339 out_io_size: 1380 1340 mr->io_size = saved_io_size; ··· 1508 1434 for_each_memory_region(mr, i915, id) { 1509 1435 struct drm_i915_gem_object *obj; 1510 1436 int err; 1437 + 1438 + if (mr->private) 1439 + continue; 1511 1440 1512 1441 obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); 1513 1442 if (obj == ERR_PTR(-ENODEV)) ··· 1656 1579 for_each_memory_region(mr, i915, id) { 1657 1580 struct drm_i915_gem_object *obj; 1658 1581 int err; 1582 + 1583 + if (mr->private) 1584 + continue; 1659 1585 1660 1586 obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); 1661 1587 if (obj == ERR_PTR(-ENODEV)) ··· 1806 1726 for_each_memory_region(mr, i915, id) { 1807 1727 struct drm_i915_gem_object *obj; 1808 1728 int err; 1729 + 1730 + if (mr->private) 1731 + continue; 1809 1732 1810 1733 obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); 1811 1734 if (obj == ERR_PTR(-ENODEV))

+2 -1

drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

··· 399 399 * the request as it may have completed and raised the interrupt as 400 400 * we were attaching it into the lists. 401 401 */ 402 - irq_work_queue(&b->irq_work); 402 + if (!b->irq_armed || __i915_request_is_complete(rq)) 403 + irq_work_queue(&b->irq_work); 403 404 } 404 405 405 406 bool i915_request_enable_breadcrumb(struct i915_request *rq)

+10 -24

drivers/gpu/drm/i915/gt/intel_engine_cs.c

··· 1517 1517 struct intel_instdone *instdone) 1518 1518 { 1519 1519 struct drm_i915_private *i915 = engine->i915; 1520 - const struct sseu_dev_info *sseu = &engine->gt->info.sseu; 1521 1520 struct intel_uncore *uncore = engine->uncore; 1522 1521 u32 mmio_base = engine->mmio_base; 1523 1522 int slice; ··· 1541 1542 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); 1542 1543 } 1543 1544 1544 - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { 1545 - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { 1546 - instdone->sampler[slice][subslice] = 1547 - intel_gt_mcr_read(engine->gt, 1548 - GEN7_SAMPLER_INSTDONE, 1549 - slice, subslice); 1550 - instdone->row[slice][subslice] = 1551 - intel_gt_mcr_read(engine->gt, 1552 - GEN7_ROW_INSTDONE, 1553 - slice, subslice); 1554 - } 1555 - } else { 1556 - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { 1557 - instdone->sampler[slice][subslice] = 1558 - intel_gt_mcr_read(engine->gt, 1559 - GEN7_SAMPLER_INSTDONE, 1560 - slice, subslice); 1561 - instdone->row[slice][subslice] = 1562 - intel_gt_mcr_read(engine->gt, 1563 - GEN7_ROW_INSTDONE, 1564 - slice, subslice); 1565 - } 1545 + for_each_ss_steering(iter, engine->gt, slice, subslice) { 1546 + instdone->sampler[slice][subslice] = 1547 + intel_gt_mcr_read(engine->gt, 1548 + GEN7_SAMPLER_INSTDONE, 1549 + slice, subslice); 1550 + instdone->row[slice][subslice] = 1551 + intel_gt_mcr_read(engine->gt, 1552 + GEN7_ROW_INSTDONE, 1553 + slice, subslice); 1566 1554 } 1567 1555 1568 1556 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { 1569 - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) 1557 + for_each_ss_steering(iter, engine->gt, slice, subslice) 1570 1558 instdone->geom_svg[slice][subslice] = 1571 1559 intel_gt_mcr_read(engine->gt, 1572 1560 XEHPG_INSTDONE_GEOM_SVG,

-22

drivers/gpu/drm/i915/gt/intel_engine_types.h

··· 647 647 return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 648 648 } 649 649 650 - #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 651 - ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 652 - 653 - #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 654 - (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ 655 - intel_sseu_has_subslice(sseu__, 0, subslice__)) 656 - 657 - #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 658 - for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 659 - (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 660 - (slice_) += ((subslice_) == 0)) \ 661 - for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 662 - (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 663 - subslice_))) 664 - 665 - #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ 666 - for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ 667 - (iter_) < GEN_SS_MASK_SIZE; \ 668 - (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ 669 - (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ 670 - for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) 671 - 672 650 #endif /* __INTEL_ENGINE_TYPES_H__ */

+14 -1

drivers/gpu/drm/i915/gt/intel_gt.c

··· 952 952 mutex_lock(&gt->tlb_invalidate_lock); 953 953 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 954 954 955 + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ 956 + 957 + for_each_engine(engine, gt, id) { 958 + struct reg_and_bit rb; 959 + 960 + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); 961 + if (!i915_mmio_reg_offset(rb.reg)) 962 + continue; 963 + 964 + intel_uncore_write_fw(uncore, rb.reg, rb.bit); 965 + } 966 + 967 + spin_unlock_irq(&uncore->lock); 968 + 955 969 for_each_engine(engine, gt, id) { 956 970 /* 957 971 * HW architecture suggest typical invalidation time at 40us, ··· 980 966 if (!i915_mmio_reg_offset(rb.reg)) 981 967 continue; 982 968 983 - intel_uncore_write_fw(uncore, rb.reg, rb.bit); 984 969 if (__intel_wait_for_register_fw(uncore, 985 970 rb.reg, rb.bit, 0, 986 971 timeout_us, timeout_ms,

+25

drivers/gpu/drm/i915/gt/intel_gt_mcr.c

··· 495 495 } 496 496 } 497 497 498 + /** 499 + * intel_gt_mcr_get_ss_steering - returns the group/instance steering for a SS 500 + * @gt: GT structure 501 + * @dss: DSS ID to obtain steering for 502 + * @group: pointer to storage for steering group ID 503 + * @instance: pointer to storage for steering instance ID 504 + * 505 + * Returns the steering IDs (via the @group and @instance parameters) that 506 + * correspond to a specific subslice/DSS ID. 507 + */ 508 + void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, 509 + unsigned int *group, unsigned int *instance) 510 + { 511 + if (IS_PONTEVECCHIO(gt->i915)) { 512 + *group = dss / GEN_DSS_PER_CSLICE; 513 + *instance = dss % GEN_DSS_PER_CSLICE; 514 + } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { 515 + *group = dss / GEN_DSS_PER_GSLICE; 516 + *instance = dss % GEN_DSS_PER_GSLICE; 517 + } else { 518 + *group = dss / GEN_MAX_SS_PER_HSW_SLICE; 519 + *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; 520 + return; 521 + } 522 + }

+24

drivers/gpu/drm/i915/gt/intel_gt_mcr.h

··· 31 31 void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, 32 32 bool dump_table); 33 33 34 + void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, 35 + unsigned int *group, unsigned int *instance); 36 + 37 + /* 38 + * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice 39 + * presence is determined by using the group/instance as direct lookups in the 40 + * slice/subslice topology. On Xe_HP and beyond, the steering is unrelated to 41 + * the topology, so we lookup the DSS ID directly in "slice 0." 42 + */ 43 + #define _HAS_SS(ss_, gt_, group_, instance_) ( \ 44 + GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \ 45 + intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \ 46 + intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_)) 47 + 48 + /* 49 + * Loop over each subslice/DSS and determine the group and instance IDs that 50 + * should be used to steer MCR accesses toward this DSS. 51 + */ 52 + #define for_each_ss_steering(ss_, gt_, group_, instance_) \ 53 + for (ss_ = 0, intel_gt_mcr_get_ss_steering(gt_, 0, &group_, &instance_); \ 54 + ss_ < I915_MAX_SS_FUSE_BITS; \ 55 + ss_++, intel_gt_mcr_get_ss_steering(gt_, ss_, &group_, &instance_)) \ 56 + for_each_if(_HAS_SS(ss_, gt_, group_, instance_)) 57 + 34 58 #endif /* __INTEL_GT_MCR__ */

+7

drivers/gpu/drm/i915/gt/intel_gt_regs.h

··· 371 371 #define GEN9_WM_CHICKEN3 _MMIO(0x5588) 372 372 #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) 373 373 374 + #define CHICKEN_RASTER_1 _MMIO(0x6204) 375 + #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) 376 + 374 377 #define VFLSKPD _MMIO(0x62a8) 375 378 #define DIS_OVER_FETCH_CACHE REG_BIT(1) 376 379 #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) ··· 921 918 #define GEN7_L3CNTLREG1 _MMIO(0xb01c) 922 919 #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C 923 920 #define GEN7_L3AGDIS (1 << 19) 921 + 922 + #define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) 923 + #define XEHPC_OVRLSCCC REG_BIT(0) 924 + 924 925 #define GEN7_L3CNTLREG2 _MMIO(0xb020) 925 926 926 927 /* MOCS (Memory Object Control State) registers */

+103 -6

drivers/gpu/drm/i915/gt/intel_region_lmem.c

··· 15 15 #include "gt/intel_gt_mcr.h" 16 16 #include "gt/intel_gt_regs.h" 17 17 18 + static void _release_bars(struct pci_dev *pdev) 19 + { 20 + int resno; 21 + 22 + for (resno = PCI_STD_RESOURCES; resno < PCI_STD_RESOURCE_END; resno++) { 23 + if (pci_resource_len(pdev, resno)) 24 + pci_release_resource(pdev, resno); 25 + } 26 + } 27 + 28 + static void 29 + _resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size) 30 + { 31 + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 32 + int bar_size = pci_rebar_bytes_to_size(size); 33 + int ret; 34 + 35 + _release_bars(pdev); 36 + 37 + ret = pci_resize_resource(pdev, resno, bar_size); 38 + if (ret) { 39 + drm_info(&i915->drm, "Failed to resize BAR%d to %dM (%pe)\n", 40 + resno, 1 << bar_size, ERR_PTR(ret)); 41 + return; 42 + } 43 + 44 + drm_info(&i915->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 45 + } 46 + 47 + #define LMEM_BAR_NUM 2 48 + static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) 49 + { 50 + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 51 + struct pci_bus *root = pdev->bus; 52 + struct resource *root_res; 53 + resource_size_t rebar_size; 54 + resource_size_t current_size; 55 + u32 pci_cmd; 56 + int i; 57 + 58 + current_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); 59 + 60 + if (i915->params.lmem_bar_size) { 61 + u32 bar_sizes; 62 + 63 + rebar_size = i915->params.lmem_bar_size * 64 + (resource_size_t)SZ_1M; 65 + bar_sizes = pci_rebar_get_possible_sizes(pdev, 66 + LMEM_BAR_NUM); 67 + 68 + if (rebar_size == current_size) 69 + return; 70 + 71 + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || 72 + rebar_size >= roundup_pow_of_two(lmem_size)) { 73 + rebar_size = lmem_size; 74 + 75 + drm_info(&i915->drm, 76 + "Given bar size is not within supported size, setting it to default: %llu\n", 77 + (u64)lmem_size >> 20); 78 + } 79 + } else { 80 + rebar_size = current_size; 81 + 82 + if (rebar_size != roundup_pow_of_two(lmem_size)) 83 + rebar_size = lmem_size; 84 + else 85 + return; 86 + } 87 + 88 + /* Find out if root bus contains 64bit memory addressing */ 89 + while (root->parent) 90 + root = root->parent; 91 + 92 + pci_bus_for_each_resource(root, root_res, i) { 93 + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 94 + root_res->start > 0x100000000ull) 95 + break; 96 + } 97 + 98 + /* pci_resize_resource will fail anyways */ 99 + if (!root_res) { 100 + drm_info(&i915->drm, "Can't resize LMEM BAR - platform support is missing\n"); 101 + return; 102 + } 103 + 104 + /* First disable PCI memory decoding references */ 105 + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 106 + pci_write_config_dword(pdev, PCI_COMMAND, 107 + pci_cmd & ~PCI_COMMAND_MEMORY); 108 + 109 + _resize_bar(i915, LMEM_BAR_NUM, rebar_size); 110 + 111 + pci_assign_unassigned_bus_resources(pdev->bus); 112 + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 113 + } 114 + 18 115 static int 19 116 region_lmem_release(struct intel_memory_region *mem) 20 117 { ··· 209 112 flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 210 113 flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; 211 114 212 - /* FIXME: Remove this when we have small-bar enabled */ 213 - if (pci_resource_len(pdev, 2) < lmem_size) { 214 - drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n"); 215 - return ERR_PTR(-EINVAL); 216 - } 217 - 218 115 if (GEM_WARN_ON(lmem_size < flat_ccs_base)) 219 116 return ERR_PTR(-EIO); 220 117 ··· 224 133 /* Stolen starts from GSMBASE without CCS */ 225 134 lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); 226 135 } 136 + 137 + i915_resize_lmem_bar(i915, lmem_size); 227 138 228 139 if (i915->params.lmem_size > 0) { 229 140 lmem_size = min_t(resource_size_t, lmem_size, ··· 262 169 &mem->io_size); 263 170 drm_info(&i915->drm, "Local memory available: %pa\n", 264 171 &lmem_size); 172 + 173 + if (io_size < lmem_size) 174 + drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n", 175 + (u64)io_size >> 20); 265 176 266 177 return mem; 267 178

+28 -9

drivers/gpu/drm/i915/gt/intel_reset.c

··· 300 300 return err; 301 301 } 302 302 303 - static int gen6_reset_engines(struct intel_gt *gt, 304 - intel_engine_mask_t engine_mask, 305 - unsigned int retry) 303 + static int __gen6_reset_engines(struct intel_gt *gt, 304 + intel_engine_mask_t engine_mask, 305 + unsigned int retry) 306 306 { 307 307 struct intel_engine_cs *engine; 308 308 u32 hw_mask; ··· 319 319 } 320 320 321 321 return gen6_hw_domain_reset(gt, hw_mask); 322 + } 323 + 324 + static int gen6_reset_engines(struct intel_gt *gt, 325 + intel_engine_mask_t engine_mask, 326 + unsigned int retry) 327 + { 328 + unsigned long flags; 329 + int ret; 330 + 331 + spin_lock_irqsave(&gt->uncore->lock, flags); 332 + ret = __gen6_reset_engines(gt, engine_mask, retry); 333 + spin_unlock_irqrestore(&gt->uncore->lock, flags); 334 + 335 + return ret; 322 336 } 323 337 324 338 static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) ··· 501 487 rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); 502 488 } 503 489 504 - static int gen11_reset_engines(struct intel_gt *gt, 505 - intel_engine_mask_t engine_mask, 506 - unsigned int retry) 490 + static int __gen11_reset_engines(struct intel_gt *gt, 491 + intel_engine_mask_t engine_mask, 492 + unsigned int retry) 507 493 { 508 494 struct intel_engine_cs *engine; 509 495 intel_engine_mask_t tmp; ··· 597 583 struct intel_engine_cs *engine; 598 584 const bool reset_non_ready = retry >= 1; 599 585 intel_engine_mask_t tmp; 586 + unsigned long flags; 600 587 int ret; 588 + 589 + spin_lock_irqsave(&gt->uncore->lock, flags); 601 590 602 591 for_each_engine_masked(engine, gt, engine_mask, tmp) { 603 592 ret = gen8_engine_reset_prepare(engine); ··· 629 612 * This is best effort, so ignore any error from the initial reset. 630 613 */ 631 614 if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) 632 - gen11_reset_engines(gt, gt->info.engine_mask, 0); 615 + __gen11_reset_engines(gt, gt->info.engine_mask, 0); 633 616 634 617 if (GRAPHICS_VER(gt->i915) >= 11) 635 - ret = gen11_reset_engines(gt, engine_mask, retry); 618 + ret = __gen11_reset_engines(gt, engine_mask, retry); 636 619 else 637 - ret = gen6_reset_engines(gt, engine_mask, retry); 620 + ret = __gen6_reset_engines(gt, engine_mask, retry); 638 621 639 622 skip_reset: 640 623 for_each_engine_masked(engine, gt, engine_mask, tmp) 641 624 gen8_engine_reset_cancel(engine); 625 + 626 + spin_unlock_irqrestore(&gt->uncore->lock, flags); 642 627 643 628 return ret; 644 629 }

+6

drivers/gpu/drm/i915/gt/intel_workarounds.c

··· 689 689 if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || 690 690 IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) 691 691 wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); 692 + 693 + /* Wa_15010599737:dg2 */ 694 + wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); 692 695 } 693 696 694 697 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, ··· 2690 2687 * performance guide section. 2691 2688 */ 2692 2689 wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); 2690 + 2691 + /* Wa_16016694945 */ 2692 + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); 2693 2693 } 2694 2694 2695 2695 if (IS_XEHPSDV(i915)) {

+4 -4

drivers/gpu/drm/i915/gt/selftest_lrc.c

··· 176 176 continue; 177 177 178 178 hw = shmem_pin_map(engine->default_state); 179 - if (IS_ERR(hw)) { 180 - err = PTR_ERR(hw); 179 + if (!hw) { 180 + err = -ENOMEM; 181 181 break; 182 182 } 183 183 hw += LRC_STATE_OFFSET / sizeof(*hw); ··· 365 365 continue; 366 366 367 367 hw = shmem_pin_map(engine->default_state); 368 - if (IS_ERR(hw)) { 369 - err = PTR_ERR(hw); 368 + if (!hw) { 369 + err = -ENOMEM; 370 370 break; 371 371 } 372 372 hw += LRC_STATE_OFFSET / sizeof(*hw);

+170 -183

drivers/gpu/drm/i915/gt/selftest_slpc.c

··· 8 8 #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) 9 9 #define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ 10 10 GEN9_FREQ_SCALER) 11 + enum test_type { 12 + VARY_MIN, 13 + VARY_MAX, 14 + MAX_GRANTED 15 + }; 11 16 12 17 static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) 13 18 { ··· 41 36 return ret; 42 37 } 43 38 44 - static int live_slpc_clamp_min(void *arg) 39 + static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, 40 + u32 *max_act_freq) 45 41 { 46 - struct drm_i915_private *i915 = arg; 47 - struct intel_gt *gt = to_gt(i915); 42 + u32 step, max_freq, req_freq; 43 + u32 act_freq; 44 + int err = 0; 45 + 46 + /* Go from max to min in 5 steps */ 47 + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; 48 + *max_act_freq = slpc->min_freq; 49 + for (max_freq = slpc->rp0_freq; max_freq > slpc->min_freq; 50 + max_freq -= step) { 51 + err = slpc_set_max_freq(slpc, max_freq); 52 + if (err) 53 + break; 54 + 55 + req_freq = intel_rps_read_punit_req_frequency(rps); 56 + 57 + /* GuC requests freq in multiples of 50/3 MHz */ 58 + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { 59 + pr_err("SWReq is %d, should be at most %d\n", req_freq, 60 + max_freq + FREQUENCY_REQ_UNIT); 61 + err = -EINVAL; 62 + } 63 + 64 + act_freq = intel_rps_read_actual_frequency(rps); 65 + if (act_freq > *max_act_freq) 66 + *max_act_freq = act_freq; 67 + 68 + if (err) 69 + break; 70 + } 71 + 72 + return err; 73 + } 74 + 75 + static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, 76 + u32 *max_act_freq) 77 + { 78 + u32 step, min_freq, req_freq; 79 + u32 act_freq; 80 + int err = 0; 81 + 82 + /* Go from min to max in 5 steps */ 83 + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; 84 + *max_act_freq = slpc->min_freq; 85 + for (min_freq = slpc->min_freq; min_freq < slpc->rp0_freq; 86 + min_freq += step) { 87 + err = slpc_set_min_freq(slpc, min_freq); 88 + if (err) 89 + break; 90 + 91 + req_freq = intel_rps_read_punit_req_frequency(rps); 92 + 93 + /* GuC requests freq in multiples of 50/3 MHz */ 94 + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { 95 + pr_err("SWReq is %d, should be at least %d\n", req_freq, 96 + min_freq - FREQUENCY_REQ_UNIT); 97 + err = -EINVAL; 98 + } 99 + 100 + act_freq = intel_rps_read_actual_frequency(rps); 101 + if (act_freq > *max_act_freq) 102 + *max_act_freq = act_freq; 103 + 104 + if (err) 105 + break; 106 + } 107 + 108 + return err; 109 + } 110 + 111 + static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) 112 + { 113 + struct intel_gt *gt = rps_to_gt(rps); 114 + u32 perf_limit_reasons; 115 + int err = 0; 116 + 117 + err = slpc_set_min_freq(slpc, slpc->rp0_freq); 118 + if (err) 119 + return err; 120 + 121 + *max_act_freq = intel_rps_read_actual_frequency(rps); 122 + if (*max_act_freq != slpc->rp0_freq) { 123 + /* Check if there was some throttling by pcode */ 124 + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); 125 + 126 + /* If not, this is an error */ 127 + if (!(perf_limit_reasons & GT0_PERF_LIMIT_REASONS_MASK)) { 128 + pr_err("Pcode did not grant max freq\n"); 129 + err = -EINVAL; 130 + } else { 131 + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); 132 + } 133 + } 134 + 135 + return err; 136 + } 137 + 138 + static int run_test(struct intel_gt *gt, int test_type) 139 + { 48 140 struct intel_guc_slpc *slpc = &gt->uc.guc.slpc; 49 141 struct intel_rps *rps = &gt->rps; 50 142 struct intel_engine_cs *engine; ··· 166 64 return -EIO; 167 65 } 168 66 169 - if (slpc_min_freq == slpc_max_freq) { 67 + if (slpc->min_freq == slpc->rp0_freq) { 170 68 pr_err("Min/Max are fused to the same value\n"); 171 69 return -EINVAL; 172 70 } ··· 175 73 intel_gt_pm_get(gt); 176 74 for_each_engine(engine, gt, id) { 177 75 struct i915_request *rq; 178 - u32 step, min_freq, req_freq; 179 - u32 act_freq, max_act_freq; 76 + u32 max_act_freq; 180 77 181 78 if (!intel_engine_can_store_dword(engine)) 182 79 continue; 183 80 184 - /* Go from min to max in 5 steps */ 185 - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; 186 - max_act_freq = slpc_min_freq; 187 - for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; 188 - min_freq += step) { 189 - err = slpc_set_min_freq(slpc, min_freq); 190 - if (err) 191 - break; 81 + st_engine_heartbeat_disable(engine); 192 82 193 - st_engine_heartbeat_disable(engine); 83 + rq = igt_spinner_create_request(&spin, 84 + engine->kernel_context, 85 + MI_NOOP); 86 + if (IS_ERR(rq)) { 87 + err = PTR_ERR(rq); 88 + st_engine_heartbeat_enable(engine); 89 + break; 90 + } 194 91 195 - rq = igt_spinner_create_request(&spin, 196 - engine->kernel_context, 197 - MI_NOOP); 198 - if (IS_ERR(rq)) { 199 - err = PTR_ERR(rq); 200 - st_engine_heartbeat_enable(engine); 201 - break; 202 - } 92 + i915_request_add(rq); 203 93 204 - i915_request_add(rq); 205 - 206 - if (!igt_wait_for_spinner(&spin, rq)) { 207 - pr_err("%s: Spinner did not start\n", 208 - engine->name); 209 - igt_spinner_end(&spin); 210 - st_engine_heartbeat_enable(engine); 211 - intel_gt_set_wedged(engine->gt); 212 - err = -EIO; 213 - break; 214 - } 215 - 216 - /* Wait for GuC to detect business and raise 217 - * requested frequency if necessary. 218 - */ 219 - delay_for_h2g(); 220 - 221 - req_freq = intel_rps_read_punit_req_frequency(rps); 222 - 223 - /* GuC requests freq in multiples of 50/3 MHz */ 224 - if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { 225 - pr_err("SWReq is %d, should be at least %d\n", req_freq, 226 - min_freq - FREQUENCY_REQ_UNIT); 227 - igt_spinner_end(&spin); 228 - st_engine_heartbeat_enable(engine); 229 - err = -EINVAL; 230 - break; 231 - } 232 - 233 - act_freq = intel_rps_read_actual_frequency(rps); 234 - if (act_freq > max_act_freq) 235 - max_act_freq = act_freq; 236 - 94 + if (!igt_wait_for_spinner(&spin, rq)) { 95 + pr_err("%s: Spinner did not start\n", 96 + engine->name); 237 97 igt_spinner_end(&spin); 238 98 st_engine_heartbeat_enable(engine); 99 + intel_gt_set_wedged(engine->gt); 100 + err = -EIO; 101 + break; 102 + } 103 + 104 + switch (test_type) { 105 + case VARY_MIN: 106 + err = vary_min_freq(slpc, rps, &max_act_freq); 107 + break; 108 + 109 + case VARY_MAX: 110 + err = vary_max_freq(slpc, rps, &max_act_freq); 111 + break; 112 + 113 + case MAX_GRANTED: 114 + /* Media engines have a different RP0 */ 115 + if (engine->class == VIDEO_DECODE_CLASS || 116 + engine->class == VIDEO_ENHANCEMENT_CLASS) { 117 + igt_spinner_end(&spin); 118 + st_engine_heartbeat_enable(engine); 119 + err = 0; 120 + continue; 121 + } 122 + 123 + err = max_granted_freq(slpc, rps, &max_act_freq); 124 + break; 239 125 } 240 126 241 127 pr_info("Max actual frequency for %s was %d\n", 242 128 engine->name, max_act_freq); 243 129 244 130 /* Actual frequency should rise above min */ 245 - if (max_act_freq == slpc_min_freq) { 131 + if (max_act_freq <= slpc_min_freq) { 246 132 pr_err("Actual freq did not rise above min\n"); 133 + pr_err("Perf Limit Reasons: 0x%x\n", 134 + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); 247 135 err = -EINVAL; 248 136 } 137 + 138 + igt_spinner_end(&spin); 139 + st_engine_heartbeat_enable(engine); 249 140 250 141 if (err) 251 142 break; ··· 258 163 return err; 259 164 } 260 165 261 - static int live_slpc_clamp_max(void *arg) 166 + static int live_slpc_vary_min(void *arg) 262 167 { 263 168 struct drm_i915_private *i915 = arg; 264 169 struct intel_gt *gt = to_gt(i915); 265 - struct intel_guc_slpc *slpc; 266 - struct intel_rps *rps; 267 - struct intel_engine_cs *engine; 268 - enum intel_engine_id id; 269 - struct igt_spinner spin; 270 - int err = 0; 271 - u32 slpc_min_freq, slpc_max_freq; 272 170 273 - slpc = &gt->uc.guc.slpc; 274 - rps = &gt->rps; 171 + return run_test(gt, VARY_MIN); 172 + } 275 173 276 - if (!intel_uc_uses_guc_slpc(&gt->uc)) 277 - return 0; 174 + static int live_slpc_vary_max(void *arg) 175 + { 176 + struct drm_i915_private *i915 = arg; 177 + struct intel_gt *gt = to_gt(i915); 278 178 279 - if (igt_spinner_init(&spin, gt)) 280 - return -ENOMEM; 179 + return run_test(gt, VARY_MAX); 180 + } 281 181 282 - if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { 283 - pr_err("Could not get SLPC max freq\n"); 284 - return -EIO; 285 - } 182 + /* check if pcode can grant RP0 */ 183 + static int live_slpc_max_granted(void *arg) 184 + { 185 + struct drm_i915_private *i915 = arg; 186 + struct intel_gt *gt = to_gt(i915); 286 187 287 - if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { 288 - pr_err("Could not get SLPC min freq\n"); 289 - return -EIO; 290 - } 291 - 292 - if (slpc_min_freq == slpc_max_freq) { 293 - pr_err("Min/Max are fused to the same value\n"); 294 - return -EINVAL; 295 - } 296 - 297 - intel_gt_pm_wait_for_idle(gt); 298 - intel_gt_pm_get(gt); 299 - for_each_engine(engine, gt, id) { 300 - struct i915_request *rq; 301 - u32 max_freq, req_freq; 302 - u32 act_freq, max_act_freq; 303 - u32 step; 304 - 305 - if (!intel_engine_can_store_dword(engine)) 306 - continue; 307 - 308 - /* Go from max to min in 5 steps */ 309 - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; 310 - max_act_freq = slpc_min_freq; 311 - for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; 312 - max_freq -= step) { 313 - err = slpc_set_max_freq(slpc, max_freq); 314 - if (err) 315 - break; 316 - 317 - st_engine_heartbeat_disable(engine); 318 - 319 - rq = igt_spinner_create_request(&spin, 320 - engine->kernel_context, 321 - MI_NOOP); 322 - if (IS_ERR(rq)) { 323 - st_engine_heartbeat_enable(engine); 324 - err = PTR_ERR(rq); 325 - break; 326 - } 327 - 328 - i915_request_add(rq); 329 - 330 - if (!igt_wait_for_spinner(&spin, rq)) { 331 - pr_err("%s: SLPC spinner did not start\n", 332 - engine->name); 333 - igt_spinner_end(&spin); 334 - st_engine_heartbeat_enable(engine); 335 - intel_gt_set_wedged(engine->gt); 336 - err = -EIO; 337 - break; 338 - } 339 - 340 - delay_for_h2g(); 341 - 342 - /* Verify that SWREQ indeed was set to specific value */ 343 - req_freq = intel_rps_read_punit_req_frequency(rps); 344 - 345 - /* GuC requests freq in multiples of 50/3 MHz */ 346 - if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { 347 - pr_err("SWReq is %d, should be at most %d\n", req_freq, 348 - max_freq + FREQUENCY_REQ_UNIT); 349 - igt_spinner_end(&spin); 350 - st_engine_heartbeat_enable(engine); 351 - err = -EINVAL; 352 - break; 353 - } 354 - 355 - act_freq = intel_rps_read_actual_frequency(rps); 356 - if (act_freq > max_act_freq) 357 - max_act_freq = act_freq; 358 - 359 - st_engine_heartbeat_enable(engine); 360 - igt_spinner_end(&spin); 361 - 362 - if (err) 363 - break; 364 - } 365 - 366 - pr_info("Max actual frequency for %s was %d\n", 367 - engine->name, max_act_freq); 368 - 369 - /* Actual frequency should rise above min */ 370 - if (max_act_freq == slpc_min_freq) { 371 - pr_err("Actual freq did not rise above min\n"); 372 - err = -EINVAL; 373 - } 374 - 375 - if (igt_flush_test(gt->i915)) { 376 - err = -EIO; 377 - break; 378 - } 379 - 380 - if (err) 381 - break; 382 - } 383 - 384 - /* Restore min/max freq */ 385 - slpc_set_max_freq(slpc, slpc_max_freq); 386 - slpc_set_min_freq(slpc, slpc_min_freq); 387 - 388 - intel_gt_pm_put(gt); 389 - igt_spinner_fini(&spin); 390 - intel_gt_pm_wait_for_idle(gt); 391 - 392 - return err; 188 + return run_test(gt, MAX_GRANTED); 393 189 } 394 190 395 191 int intel_slpc_live_selftests(struct drm_i915_private *i915) 396 192 { 397 193 static const struct i915_subtest tests[] = { 398 - SUBTEST(live_slpc_clamp_max), 399 - SUBTEST(live_slpc_clamp_min), 194 + SUBTEST(live_slpc_vary_max), 195 + SUBTEST(live_slpc_vary_min), 196 + SUBTEST(live_slpc_max_granted), 400 197 }; 401 198 402 199 if (intel_gt_is_wedged(to_gt(i915)))

+8 -8

drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c

··· 9 9 10 10 #include "gt/intel_engine_regs.h" 11 11 #include "gt/intel_gt.h" 12 + #include "gt/intel_gt_mcr.h" 12 13 #include "gt/intel_gt_regs.h" 13 14 #include "gt/intel_lrc.h" 14 15 #include "guc_capture_fwif.h" ··· 282 281 const struct __guc_mmio_reg_descr_group *lists) 283 282 { 284 283 struct intel_gt *gt = guc_to_gt(guc); 285 - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 286 - int slice, subslice, i, num_steer_regs, num_tot_regs = 0; 284 + int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0; 287 285 const struct __guc_mmio_reg_descr_group *list; 288 286 struct __guc_mmio_reg_descr_group *extlists; 289 287 struct __guc_mmio_reg_descr *extarray; ··· 298 298 num_steer_regs = ARRAY_SIZE(xe_extregs); 299 299 300 300 sseu = &gt->info.sseu; 301 - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) 301 + for_each_ss_steering(iter, gt, slice, subslice) 302 302 num_tot_regs += num_steer_regs; 303 303 304 304 if (!num_tot_regs) ··· 315 315 } 316 316 317 317 extarray = extlists[0].extlist; 318 - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { 318 + for_each_ss_steering(iter, gt, slice, subslice) { 319 319 for (i = 0; i < num_steer_regs; ++i) { 320 320 __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); 321 321 ++extarray; ··· 359 359 num_steer_regs += ARRAY_SIZE(xehpg_extregs); 360 360 361 361 sseu = &gt->info.sseu; 362 - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { 362 + for_each_ss_steering(iter, gt, slice, subslice) 363 363 num_tot_regs += num_steer_regs; 364 - } 365 364 366 365 if (!num_tot_regs) 367 366 return; ··· 376 377 } 377 378 378 379 extarray = extlists[0].extlist; 379 - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { 380 + for_each_ss_steering(iter, gt, slice, subslice) { 380 381 for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { 381 382 __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); 382 383 ++extarray; ··· 1260 1261 GUC_CAPTURE_LOG_BUFFER 1261 1262 }; 1262 1263 1263 - return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1264 + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); 1265 + 1264 1266 } 1265 1267 1266 1268 static void __guc_capture_process_output(struct intel_guc *guc)

+1 -1

drivers/gpu/drm/i915/gt/uc/intel_guc_log.c

··· 31 31 GUC_DEBUG_LOG_BUFFER 32 32 }; 33 33 34 - return intel_guc_send(guc, action, ARRAY_SIZE(action)); 34 + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); 35 35 } 36 36 37 37 static int guc_action_flush_log(struct intel_guc *guc)

+9

drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c

··· 162 162 u8 rev = INTEL_REVID(i915); 163 163 int i; 164 164 165 + /* 166 + * The only difference between the ADL GuC FWs is the HWConfig support. 167 + * ADL-N does not support HWConfig, so we should use the same binary as 168 + * ADL-S, otherwise the GuC might attempt to fetch a config table that 169 + * does not exist. 170 + */ 171 + if (IS_ADLP_N(i915)) 172 + p = INTEL_ALDERLAKE_S; 173 + 165 174 GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); 166 175 fw_blobs = blobs_all[uc_fw->type].blobs; 167 176 fw_count = blobs_all[uc_fw->type].count;

+1 -1

drivers/gpu/drm/i915/i915_active.c

··· 974 974 975 975 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 976 976 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 977 - intel_engine_pm_put_delay(engine, 1); 977 + intel_engine_pm_put_delay(engine, 2); 978 978 } 979 979 } 980 980

+5

drivers/gpu/drm/i915/i915_drv.h

··· 1005 1005 #define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) 1006 1006 #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) 1007 1007 #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) 1008 + #define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_METEORLAKE) 1008 1009 1010 + #define IS_METEORLAKE_M(dev_priv) \ 1011 + IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_M) 1012 + #define IS_METEORLAKE_P(dev_priv) \ 1013 + IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_P) 1009 1014 #define IS_DG2_G10(dev_priv) \ 1010 1015 IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) 1011 1016 #define IS_DG2_G11(dev_priv) \

+17 -25

drivers/gpu/drm/i915/i915_gpu_error.c

··· 46 46 #include "gem/i915_gem_lmem.h" 47 47 #include "gt/intel_engine_regs.h" 48 48 #include "gt/intel_gt.h" 49 + #include "gt/intel_gt_mcr.h" 49 50 #include "gt/intel_gt_pm.h" 50 51 #include "gt/intel_gt_regs.h" 51 52 #include "gt/uc/intel_guc_capture.h" ··· 437 436 static void error_print_instdone(struct drm_i915_error_state_buf *m, 438 437 const struct intel_engine_coredump *ee) 439 438 { 440 - const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; 441 439 int slice; 442 440 int subslice; 443 441 int iter; ··· 453 453 if (GRAPHICS_VER(m->i915) <= 6) 454 454 return; 455 455 456 - if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { 457 - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) 458 - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", 459 - slice, subslice, 460 - ee->instdone.sampler[slice][subslice]); 456 + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) 457 + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", 458 + slice, subslice, 459 + ee->instdone.sampler[slice][subslice]); 461 460 462 - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) 463 - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", 464 - slice, subslice, 465 - ee->instdone.row[slice][subslice]); 466 - } else { 467 - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) 468 - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", 469 - slice, subslice, 470 - ee->instdone.sampler[slice][subslice]); 471 - 472 - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) 473 - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", 474 - slice, subslice, 475 - ee->instdone.row[slice][subslice]); 476 - } 461 + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) 462 + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", 463 + slice, subslice, 464 + ee->instdone.row[slice][subslice]); 477 465 478 466 if (GRAPHICS_VER(m->i915) < 12) 479 467 return; 480 468 481 469 if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { 482 - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) 470 + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) 483 471 err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", 484 472 slice, subslice, 485 473 ee->instdone.geom_svg[slice][subslice]); ··· 1117 1129 dma_addr_t dma; 1118 1130 1119 1131 for_each_sgt_daddr(dma, iter, vma_res->bi.pages) { 1132 + dma_addr_t offset = dma - mem->region.start; 1120 1133 void __iomem *s; 1121 1134 1122 - s = io_mapping_map_wc(&mem->iomap, 1123 - dma - mem->region.start, 1124 - PAGE_SIZE); 1135 + if (offset + PAGE_SIZE > mem->io_size) { 1136 + ret = -EINVAL; 1137 + break; 1138 + } 1139 + 1140 + s = io_mapping_map_wc(&mem->iomap, offset, PAGE_SIZE); 1125 1141 ret = compress_page(compress, 1126 1142 (void __force *)s, dst, 1127 1143 true);

+2

drivers/gpu/drm/i915/i915_params.c

··· 204 204 205 205 i915_param_named_unsafe(lmem_size, uint, 0400, 206 206 "Set the lmem size(in MiB) for each region. (default: 0, all memory)"); 207 + i915_param_named_unsafe(lmem_bar_size, uint, 0400, 208 + "Set the lmem bar size(in MiB)."); 207 209 208 210 static __always_inline void _print_param(struct drm_printer *p, 209 211 const char *name,

+1

drivers/gpu/drm/i915/i915_params.h

··· 74 74 param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ 75 75 param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ 76 76 param(unsigned int, lmem_size, 0, 0400) \ 77 + param(unsigned int, lmem_bar_size, 0, 0400) \ 77 78 /* leave bools at the end to not create holes */ \ 78 79 param(bool, enable_hangcheck, true, 0600) \ 79 80 param(bool, load_detect_test, false, 0600) \

+27 -1

drivers/gpu/drm/i915/i915_pci.c

··· 1075 1075 .require_force_probe = 1, 1076 1076 }; 1077 1077 1078 - __maybe_unused 1079 1078 static const struct intel_device_info ats_m_info = { 1080 1079 DG2_FEATURES, 1081 1080 .display = { 0 }, ··· 1104 1105 BIT(BCS0) | 1105 1106 BIT(VCS0) | 1106 1107 BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), 1108 + .require_force_probe = 1, 1109 + }; 1110 + 1111 + #define XE_LPDP_FEATURES \ 1112 + XE_LPD_FEATURES, \ 1113 + .display.ver = 14, \ 1114 + .display.has_cdclk_crawl = 1 1115 + 1116 + __maybe_unused 1117 + static const struct intel_device_info mtl_info = { 1118 + XE_HP_FEATURES, 1119 + XE_LPDP_FEATURES, 1120 + /* 1121 + * Real graphics IP version will be obtained from hardware GMD_ID 1122 + * register. Value provided here is just for sanity checking. 1123 + */ 1124 + .graphics.ver = 12, 1125 + .graphics.rel = 70, 1126 + .media.ver = 13, 1127 + PLATFORM(INTEL_METEORLAKE), 1128 + .display.has_modular_fia = 1, 1129 + .has_flat_ccs = 0, 1130 + .has_snoop = 1, 1131 + .memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, 1132 + .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), 1107 1133 .require_force_probe = 1, 1108 1134 }; 1109 1135 ··· 1213 1189 INTEL_RPLS_IDS(&adl_s_info), 1214 1190 INTEL_RPLP_IDS(&adl_p_info), 1215 1191 INTEL_DG2_IDS(&dg2_info), 1192 + INTEL_ATS_M_IDS(&ats_m_info), 1193 + INTEL_MTL_IDS(&mtl_info), 1216 1194 {0, 0, 0} 1217 1195 }; 1218 1196 MODULE_DEVICE_TABLE(pci, pciidlist);

+107 -51

drivers/gpu/drm/i915/i915_perf.c

··· 885 885 if (ret) 886 886 return ret; 887 887 888 - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 889 - stream->period_exponent); 888 + drm_dbg(&stream->perf->i915->drm, 889 + "OA buffer overflow (exponent = %d): force restart\n", 890 + stream->period_exponent); 890 891 891 892 stream->perf->ops.oa_disable(stream); 892 893 stream->perf->ops.oa_enable(stream); ··· 1109 1108 if (ret) 1110 1109 return ret; 1111 1110 1112 - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 1113 - stream->period_exponent); 1111 + drm_dbg(&stream->perf->i915->drm, 1112 + "OA buffer overflow (exponent = %d): force restart\n", 1113 + stream->period_exponent); 1114 1114 1115 1115 stream->perf->ops.oa_disable(stream); 1116 1116 stream->perf->ops.oa_enable(stream); ··· 2865 2863 int ret; 2866 2864 2867 2865 if (!props->engine) { 2868 - DRM_DEBUG("OA engine not specified\n"); 2866 + drm_dbg(&stream->perf->i915->drm, 2867 + "OA engine not specified\n"); 2869 2868 return -EINVAL; 2870 2869 } 2871 2870 ··· 2876 2873 * IDs 2877 2874 */ 2878 2875 if (!perf->metrics_kobj) { 2879 - DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2876 + drm_dbg(&stream->perf->i915->drm, 2877 + "OA metrics weren't advertised via sysfs\n"); 2880 2878 return -EINVAL; 2881 2879 } 2882 2880 2883 2881 if (!(props->sample_flags & SAMPLE_OA_REPORT) && 2884 2882 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) { 2885 - DRM_DEBUG("Only OA report sampling supported\n"); 2883 + drm_dbg(&stream->perf->i915->drm, 2884 + "Only OA report sampling supported\n"); 2886 2885 return -EINVAL; 2887 2886 } 2888 2887 2889 2888 if (!perf->ops.enable_metric_set) { 2890 - DRM_DEBUG("OA unit not supported\n"); 2889 + drm_dbg(&stream->perf->i915->drm, 2890 + "OA unit not supported\n"); 2891 2891 return -ENODEV; 2892 2892 } 2893 2893 ··· 2900 2894 * we currently only allow exclusive access 2901 2895 */ 2902 2896 if (perf->exclusive_stream) { 2903 - DRM_DEBUG("OA unit already in use\n"); 2897 + drm_dbg(&stream->perf->i915->drm, 2898 + "OA unit already in use\n"); 2904 2899 return -EBUSY; 2905 2900 } 2906 2901 2907 2902 if (!props->oa_format) { 2908 - DRM_DEBUG("OA report format not specified\n"); 2903 + drm_dbg(&stream->perf->i915->drm, 2904 + "OA report format not specified\n"); 2909 2905 return -EINVAL; 2910 2906 } 2911 2907 ··· 2937 2929 if (stream->ctx) { 2938 2930 ret = oa_get_render_ctx_id(stream); 2939 2931 if (ret) { 2940 - DRM_DEBUG("Invalid context id to filter with\n"); 2932 + drm_dbg(&stream->perf->i915->drm, 2933 + "Invalid context id to filter with\n"); 2941 2934 return ret; 2942 2935 } 2943 2936 } 2944 2937 2945 2938 ret = alloc_noa_wait(stream); 2946 2939 if (ret) { 2947 - DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); 2940 + drm_dbg(&stream->perf->i915->drm, 2941 + "Unable to allocate NOA wait batch buffer\n"); 2948 2942 goto err_noa_wait_alloc; 2949 2943 } 2950 2944 2951 2945 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); 2952 2946 if (!stream->oa_config) { 2953 - DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); 2947 + drm_dbg(&stream->perf->i915->drm, 2948 + "Invalid OA config id=%i\n", props->metrics_set); 2954 2949 ret = -EINVAL; 2955 2950 goto err_config; 2956 2951 } ··· 2984 2973 2985 2974 ret = i915_perf_stream_enable_sync(stream); 2986 2975 if (ret) { 2987 - DRM_DEBUG("Unable to enable metric set\n"); 2976 + drm_dbg(&stream->perf->i915->drm, 2977 + "Unable to enable metric set\n"); 2988 2978 goto err_enable; 2989 2979 } 2990 2980 2991 - DRM_DEBUG("opening stream oa config uuid=%s\n", 2981 + drm_dbg(&stream->perf->i915->drm, 2982 + "opening stream oa config uuid=%s\n", 2992 2983 stream->oa_config->uuid); 2993 2984 2994 2985 hrtimer_init(&stream->poll_check_timer, ··· 3442 3429 3443 3430 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 3444 3431 if (IS_ERR(specific_ctx)) { 3445 - DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 3432 + drm_dbg(&perf->i915->drm, 3433 + "Failed to look up context with ID %u for opening perf stream\n", 3446 3434 ctx_handle); 3447 3435 ret = PTR_ERR(specific_ctx); 3448 3436 goto err; ··· 3477 3463 3478 3464 if (props->hold_preemption) { 3479 3465 if (!props->single_context) { 3480 - DRM_DEBUG("preemption disable with no context\n"); 3466 + drm_dbg(&perf->i915->drm, 3467 + "preemption disable with no context\n"); 3481 3468 ret = -EINVAL; 3482 3469 goto err; 3483 3470 } ··· 3500 3485 */ 3501 3486 if (privileged_op && 3502 3487 i915_perf_stream_paranoid && !perfmon_capable()) { 3503 - DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); 3488 + drm_dbg(&perf->i915->drm, 3489 + "Insufficient privileges to open i915 perf stream\n"); 3504 3490 ret = -EACCES; 3505 3491 goto err_ctx; 3506 3492 } ··· 3608 3592 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; 3609 3593 3610 3594 if (!n_props) { 3611 - DRM_DEBUG("No i915 perf properties given\n"); 3595 + drm_dbg(&perf->i915->drm, 3596 + "No i915 perf properties given\n"); 3612 3597 return -EINVAL; 3613 3598 } 3614 3599 ··· 3618 3601 I915_ENGINE_CLASS_RENDER, 3619 3602 0); 3620 3603 if (!props->engine) { 3621 - DRM_DEBUG("No RENDER-capable engines\n"); 3604 + drm_dbg(&perf->i915->drm, 3605 + "No RENDER-capable engines\n"); 3622 3606 return -EINVAL; 3623 3607 } 3624 3608 ··· 3630 3612 * from userspace. 3631 3613 */ 3632 3614 if (n_props >= DRM_I915_PERF_PROP_MAX) { 3633 - DRM_DEBUG("More i915 perf properties specified than exist\n"); 3615 + drm_dbg(&perf->i915->drm, 3616 + "More i915 perf properties specified than exist\n"); 3634 3617 return -EINVAL; 3635 3618 } 3636 3619 ··· 3648 3629 return ret; 3649 3630 3650 3631 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 3651 - DRM_DEBUG("Unknown i915 perf property ID\n"); 3632 + drm_dbg(&perf->i915->drm, 3633 + "Unknown i915 perf property ID\n"); 3652 3634 return -EINVAL; 3653 3635 } 3654 3636 ··· 3664 3644 break; 3665 3645 case DRM_I915_PERF_PROP_OA_METRICS_SET: 3666 3646 if (value == 0) { 3667 - DRM_DEBUG("Unknown OA metric set ID\n"); 3647 + drm_dbg(&perf->i915->drm, 3648 + "Unknown OA metric set ID\n"); 3668 3649 return -EINVAL; 3669 3650 } 3670 3651 props->metrics_set = value; 3671 3652 break; 3672 3653 case DRM_I915_PERF_PROP_OA_FORMAT: 3673 3654 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 3674 - DRM_DEBUG("Out-of-range OA report format %llu\n", 3655 + drm_dbg(&perf->i915->drm, 3656 + "Out-of-range OA report format %llu\n", 3675 3657 value); 3676 3658 return -EINVAL; 3677 3659 } 3678 3660 if (!oa_format_valid(perf, value)) { 3679 - DRM_DEBUG("Unsupported OA report format %llu\n", 3661 + drm_dbg(&perf->i915->drm, 3662 + "Unsupported OA report format %llu\n", 3680 3663 value); 3681 3664 return -EINVAL; 3682 3665 } ··· 3687 3664 break; 3688 3665 case DRM_I915_PERF_PROP_OA_EXPONENT: 3689 3666 if (value > OA_EXPONENT_MAX) { 3690 - DRM_DEBUG("OA timer exponent too high (> %u)\n", 3667 + drm_dbg(&perf->i915->drm, 3668 + "OA timer exponent too high (> %u)\n", 3691 3669 OA_EXPONENT_MAX); 3692 3670 return -EINVAL; 3693 3671 } ··· 3716 3692 oa_freq_hz = 0; 3717 3693 3718 3694 if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) { 3719 - DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", 3695 + drm_dbg(&perf->i915->drm, 3696 + "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", 3720 3697 i915_oa_max_sample_rate); 3721 3698 return -EACCES; 3722 3699 } ··· 3731 3706 case DRM_I915_PERF_PROP_GLOBAL_SSEU: { 3732 3707 struct drm_i915_gem_context_param_sseu user_sseu; 3733 3708 3709 + if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) { 3710 + drm_dbg(&perf->i915->drm, 3711 + "SSEU config not supported on gfx %x\n", 3712 + GRAPHICS_VER_FULL(perf->i915)); 3713 + return -ENODEV; 3714 + } 3715 + 3734 3716 if (copy_from_user(&user_sseu, 3735 3717 u64_to_user_ptr(value), 3736 3718 sizeof(user_sseu))) { 3737 - DRM_DEBUG("Unable to copy global sseu parameter\n"); 3719 + drm_dbg(&perf->i915->drm, 3720 + "Unable to copy global sseu parameter\n"); 3738 3721 return -EFAULT; 3739 3722 } 3740 3723 3741 3724 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); 3742 3725 if (ret) { 3743 - DRM_DEBUG("Invalid SSEU configuration\n"); 3726 + drm_dbg(&perf->i915->drm, 3727 + "Invalid SSEU configuration\n"); 3744 3728 return ret; 3745 3729 } 3746 3730 props->has_sseu = true; ··· 3757 3723 } 3758 3724 case DRM_I915_PERF_PROP_POLL_OA_PERIOD: 3759 3725 if (value < 100000 /* 100us */) { 3760 - DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n", 3726 + drm_dbg(&perf->i915->drm, 3727 + "OA availability timer too small (%lluns < 100us)\n", 3761 3728 value); 3762 3729 return -EINVAL; 3763 3730 } ··· 3809 3774 int ret; 3810 3775 3811 3776 if (!perf->i915) { 3812 - DRM_DEBUG("i915 perf interface not available for this system\n"); 3777 + drm_dbg(&perf->i915->drm, 3778 + "i915 perf interface not available for this system\n"); 3813 3779 return -ENOTSUPP; 3814 3780 } 3815 3781 ··· 3818 3782 I915_PERF_FLAG_FD_NONBLOCK | 3819 3783 I915_PERF_FLAG_DISABLED; 3820 3784 if (param->flags & ~known_open_flags) { 3821 - DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 3785 + drm_dbg(&perf->i915->drm, 3786 + "Unknown drm_i915_perf_open_param flag\n"); 3822 3787 return -EINVAL; 3823 3788 } 3824 3789 ··· 4065 4028 goto addr_err; 4066 4029 4067 4030 if (!is_valid(perf, addr)) { 4068 - DRM_DEBUG("Invalid oa_reg address: %X\n", addr); 4031 + drm_dbg(&perf->i915->drm, 4032 + "Invalid oa_reg address: %X\n", addr); 4069 4033 err = -EINVAL; 4070 4034 goto addr_err; 4071 4035 } ··· 4140 4102 int err, id; 4141 4103 4142 4104 if (!perf->i915) { 4143 - DRM_DEBUG("i915 perf interface not available for this system\n"); 4105 + drm_dbg(&perf->i915->drm, 4106 + "i915 perf interface not available for this system\n"); 4144 4107 return -ENOTSUPP; 4145 4108 } 4146 4109 4147 4110 if (!perf->metrics_kobj) { 4148 - DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 4111 + drm_dbg(&perf->i915->drm, 4112 + "OA metrics weren't advertised via sysfs\n"); 4149 4113 return -EINVAL; 4150 4114 } 4151 4115 4152 4116 if (i915_perf_stream_paranoid && !perfmon_capable()) { 4153 - DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); 4117 + drm_dbg(&perf->i915->drm, 4118 + "Insufficient privileges to add i915 OA config\n"); 4154 4119 return -EACCES; 4155 4120 } 4156 4121 4157 4122 if ((!args->mux_regs_ptr || !args->n_mux_regs) && 4158 4123 (!args->boolean_regs_ptr || !args->n_boolean_regs) && 4159 4124 (!args->flex_regs_ptr || !args->n_flex_regs)) { 4160 - DRM_DEBUG("No OA registers given\n"); 4125 + drm_dbg(&perf->i915->drm, 4126 + "No OA registers given\n"); 4161 4127 return -EINVAL; 4162 4128 } 4163 4129 4164 4130 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 4165 4131 if (!oa_config) { 4166 - DRM_DEBUG("Failed to allocate memory for the OA config\n"); 4132 + drm_dbg(&perf->i915->drm, 4133 + "Failed to allocate memory for the OA config\n"); 4167 4134 return -ENOMEM; 4168 4135 } 4169 4136 ··· 4176 4133 kref_init(&oa_config->ref); 4177 4134 4178 4135 if (!uuid_is_valid(args->uuid)) { 4179 - DRM_DEBUG("Invalid uuid format for OA config\n"); 4136 + drm_dbg(&perf->i915->drm, 4137 + "Invalid uuid format for OA config\n"); 4180 4138 err = -EINVAL; 4181 4139 goto reg_err; 4182 4140 } ··· 4194 4150 args->n_mux_regs); 4195 4151 4196 4152 if (IS_ERR(regs)) { 4197 - DRM_DEBUG("Failed to create OA config for mux_regs\n"); 4153 + drm_dbg(&perf->i915->drm, 4154 + "Failed to create OA config for mux_regs\n"); 4198 4155 err = PTR_ERR(regs); 4199 4156 goto reg_err; 4200 4157 } ··· 4208 4163 args->n_boolean_regs); 4209 4164 4210 4165 if (IS_ERR(regs)) { 4211 - DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); 4166 + drm_dbg(&perf->i915->drm, 4167 + "Failed to create OA config for b_counter_regs\n"); 4212 4168 err = PTR_ERR(regs); 4213 4169 goto reg_err; 4214 4170 } ··· 4228 4182 args->n_flex_regs); 4229 4183 4230 4184 if (IS_ERR(regs)) { 4231 - DRM_DEBUG("Failed to create OA config for flex_regs\n"); 4185 + drm_dbg(&perf->i915->drm, 4186 + "Failed to create OA config for flex_regs\n"); 4232 4187 err = PTR_ERR(regs); 4233 4188 goto reg_err; 4234 4189 } ··· 4245 4198 */ 4246 4199 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 4247 4200 if (!strcmp(tmp->uuid, oa_config->uuid)) { 4248 - DRM_DEBUG("OA config already exists with this uuid\n"); 4201 + drm_dbg(&perf->i915->drm, 4202 + "OA config already exists with this uuid\n"); 4249 4203 err = -EADDRINUSE; 4250 4204 goto sysfs_err; 4251 4205 } ··· 4254 4206 4255 4207 err = create_dynamic_oa_sysfs_entry(perf, oa_config); 4256 4208 if (err) { 4257 - DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 4209 + drm_dbg(&perf->i915->drm, 4210 + "Failed to create sysfs entry for OA config\n"); 4258 4211 goto sysfs_err; 4259 4212 } 4260 4213 ··· 4264 4215 oa_config, 2, 4265 4216 0, GFP_KERNEL); 4266 4217 if (oa_config->id < 0) { 4267 - DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 4218 + drm_dbg(&perf->i915->drm, 4219 + "Failed to create sysfs entry for OA config\n"); 4268 4220 err = oa_config->id; 4269 4221 goto sysfs_err; 4270 4222 } 4271 4223 4272 4224 mutex_unlock(&perf->metrics_lock); 4273 4225 4274 - DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); 4226 + drm_dbg(&perf->i915->drm, 4227 + "Added config %s id=%i\n", oa_config->uuid, oa_config->id); 4275 4228 4276 4229 return oa_config->id; 4277 4230 ··· 4281 4230 mutex_unlock(&perf->metrics_lock); 4282 4231 reg_err: 4283 4232 i915_oa_config_put(oa_config); 4284 - DRM_DEBUG("Failed to add new OA config\n"); 4233 + drm_dbg(&perf->i915->drm, 4234 + "Failed to add new OA config\n"); 4285 4235 return err; 4286 4236 } 4287 4237 ··· 4306 4254 int ret; 4307 4255 4308 4256 if (!perf->i915) { 4309 - DRM_DEBUG("i915 perf interface not available for this system\n"); 4257 + drm_dbg(&perf->i915->drm, 4258 + "i915 perf interface not available for this system\n"); 4310 4259 return -ENOTSUPP; 4311 4260 } 4312 4261 4313 4262 if (i915_perf_stream_paranoid && !perfmon_capable()) { 4314 - DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); 4263 + drm_dbg(&perf->i915->drm, 4264 + "Insufficient privileges to remove i915 OA config\n"); 4315 4265 return -EACCES; 4316 4266 } 4317 4267 ··· 4323 4269 4324 4270 oa_config = idr_find(&perf->metrics_idr, *arg); 4325 4271 if (!oa_config) { 4326 - DRM_DEBUG("Failed to remove unknown OA config\n"); 4272 + drm_dbg(&perf->i915->drm, 4273 + "Failed to remove unknown OA config\n"); 4327 4274 ret = -ENOENT; 4328 4275 goto err_unlock; 4329 4276 } ··· 4337 4282 4338 4283 mutex_unlock(&perf->metrics_lock); 4339 4284 4340 - DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 4285 + drm_dbg(&perf->i915->drm, 4286 + "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 4341 4287 4342 4288 i915_oa_config_put(oa_config); 4343 4289

+15 -1

drivers/gpu/drm/i915/i915_query.c

··· 498 498 info.region.memory_class = mr->type; 499 499 info.region.memory_instance = mr->instance; 500 500 info.probed_size = mr->total; 501 - info.unallocated_size = mr->avail; 501 + 502 + if (mr->type == INTEL_MEMORY_LOCAL) 503 + info.probed_cpu_visible_size = mr->io_size; 504 + else 505 + info.probed_cpu_visible_size = mr->total; 506 + 507 + if (perfmon_capable()) { 508 + intel_memory_region_avail(mr, 509 + &info.unallocated_size, 510 + &info.unallocated_cpu_visible_size); 511 + } else { 512 + info.unallocated_size = info.probed_size; 513 + info.unallocated_cpu_visible_size = 514 + info.probed_cpu_visible_size; 515 + } 502 516 503 517 if (__copy_to_user(info_ptr, &info, sizeof(info))) 504 518 return -EFAULT;

+19 -8

drivers/gpu/drm/i915/i915_scatterlist.c

··· 68 68 * drm_mm_node 69 69 * @node: The drm_mm_node. 70 70 * @region_start: An offset to add to the dma addresses of the sg list. 71 + * @page_alignment: Required page alignment for each sg entry. Power of two. 71 72 * 72 73 * Create a struct sg_table, initializing it from a struct drm_mm_node, 73 74 * taking a maximum segment length into account, splitting into segments ··· 78 77 * error code cast to an error pointer on failure. 79 78 */ 80 79 struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, 81 - u64 region_start) 80 + u64 region_start, 81 + u32 page_alignment) 82 82 { 83 - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ 84 - u64 segment_pages = max_segment >> PAGE_SHIFT; 83 + const u32 max_segment = round_down(UINT_MAX, page_alignment); 84 + const u32 segment_pages = max_segment >> PAGE_SHIFT; 85 85 u64 block_size, offset, prev_end; 86 86 struct i915_refct_sgt *rsgt; 87 87 struct sg_table *st; 88 88 struct scatterlist *sg; 89 + 90 + GEM_BUG_ON(!max_segment); 89 91 90 92 rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); 91 93 if (!rsgt) ··· 96 92 97 93 i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); 98 94 st = &rsgt->table; 99 - if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), 95 + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), 100 96 GFP_KERNEL)) { 101 97 i915_refct_sgt_put(rsgt); 102 98 return ERR_PTR(-ENOMEM); ··· 116 112 sg = __sg_next(sg); 117 113 118 114 sg_dma_address(sg) = region_start + offset; 115 + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), 116 + page_alignment)); 119 117 sg_dma_len(sg) = 0; 120 118 sg->length = 0; 121 119 st->nents++; 122 120 } 123 121 124 - len = min(block_size, max_segment - sg->length); 122 + len = min_t(u64, block_size, max_segment - sg->length); 125 123 sg->length += len; 126 124 sg_dma_len(sg) += len; 127 125 ··· 144 138 * i915_buddy_block list 145 139 * @res: The struct i915_ttm_buddy_resource. 146 140 * @region_start: An offset to add to the dma addresses of the sg list. 141 + * @page_alignment: Required page alignment for each sg entry. Power of two. 147 142 * 148 143 * Create a struct sg_table, initializing it from struct i915_buddy_block list, 149 144 * taking a maximum segment length into account, splitting into segments ··· 154 147 * error code cast to an error pointer on failure. 155 148 */ 156 149 struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, 157 - u64 region_start) 150 + u64 region_start, 151 + u32 page_alignment) 158 152 { 159 153 struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); 160 154 const u64 size = res->num_pages << PAGE_SHIFT; 161 - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); 155 + const u32 max_segment = round_down(UINT_MAX, page_alignment); 162 156 struct drm_buddy *mm = bman_res->mm; 163 157 struct list_head *blocks = &bman_res->blocks; 164 158 struct drm_buddy_block *block; ··· 169 161 resource_size_t prev_end; 170 162 171 163 GEM_BUG_ON(list_empty(blocks)); 164 + GEM_BUG_ON(!max_segment); 172 165 173 166 rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); 174 167 if (!rsgt) ··· 200 191 sg = __sg_next(sg); 201 192 202 193 sg_dma_address(sg) = region_start + offset; 194 + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), 195 + page_alignment)); 203 196 sg_dma_len(sg) = 0; 204 197 sg->length = 0; 205 198 st->nents++; 206 199 } 207 200 208 - len = min(block_size, max_segment - sg->length); 201 + len = min_t(u64, block_size, max_segment - sg->length); 209 202 sg->length += len; 210 203 sg_dma_len(sg) += len; 211 204

+4 -2

drivers/gpu/drm/i915/i915_scatterlist.h

··· 213 213 void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); 214 214 215 215 struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, 216 - u64 region_start); 216 + u64 region_start, 217 + u32 page_alignment); 217 218 218 219 struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, 219 - u64 region_start); 220 + u64 region_start, 221 + u32 page_alignment); 220 222 221 223 #endif

+23 -8

drivers/gpu/drm/i915/i915_ttm_buddy_manager.c

··· 104 104 min_page_size, 105 105 &bman_res->blocks, 106 106 bman_res->flags); 107 - mutex_unlock(&bman->lock); 108 107 if (unlikely(err)) 109 108 goto err_free_blocks; 110 109 111 110 if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { 112 111 u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT; 113 112 114 - mutex_lock(&bman->lock); 115 113 drm_buddy_block_trim(mm, 116 114 original_size, 117 115 &bman_res->blocks); 118 - mutex_unlock(&bman->lock); 119 116 } 120 117 121 118 if (lpfn <= bman->visible_size) { ··· 134 137 } 135 138 } 136 139 137 - if (bman_res->used_visible_size) { 138 - mutex_lock(&bman->lock); 140 + if (bman_res->used_visible_size) 139 141 bman->visible_avail -= bman_res->used_visible_size; 140 - mutex_unlock(&bman->lock); 141 - } 142 + 143 + mutex_unlock(&bman->lock); 142 144 143 145 if (place->lpfn - place->fpfn == n_pages) 144 146 bman_res->base.start = place->fpfn; ··· 150 154 return 0; 151 155 152 156 err_free_blocks: 153 - mutex_lock(&bman->lock); 154 157 drm_buddy_free_list(mm, &bman_res->blocks); 155 158 mutex_unlock(&bman->lock); 156 159 err_free_res: ··· 358 363 struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); 359 364 360 365 return bman->visible_size; 366 + } 367 + 368 + /** 369 + * i915_ttm_buddy_man_avail - Query the avail tracking for the manager. 370 + * 371 + * @man: The buddy allocator ttm manager 372 + * @avail: The total available memory in pages for the entire manager. 373 + * @visible_avail: The total available memory in pages for the CPU visible 374 + * portion. Note that this will always give the same value as @avail on 375 + * configurations that don't have a small BAR. 376 + */ 377 + void i915_ttm_buddy_man_avail(struct ttm_resource_manager *man, 378 + u64 *avail, u64 *visible_avail) 379 + { 380 + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); 381 + 382 + mutex_lock(&bman->lock); 383 + *avail = bman->mm.avail >> PAGE_SHIFT; 384 + *visible_avail = bman->visible_avail; 385 + mutex_unlock(&bman->lock); 361 386 } 362 387 363 388 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

+3

drivers/gpu/drm/i915/i915_ttm_buddy_manager.h

··· 61 61 62 62 u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man); 63 63 64 + void i915_ttm_buddy_man_avail(struct ttm_resource_manager *man, 65 + u64 *avail, u64 *avail_visible); 66 + 64 67 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 65 68 void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man, 66 69 u64 size);

+21 -16

drivers/gpu/drm/i915/i915_vma.c

··· 310 310 struct i915_address_space *vm; 311 311 struct i915_vm_pt_stash stash; 312 312 struct i915_vma_resource *vma_res; 313 - struct drm_i915_gem_object *pinned; 313 + struct drm_i915_gem_object *obj; 314 314 struct i915_sw_dma_fence_cb cb; 315 315 enum i915_cache_level cache_level; 316 316 unsigned int flags; ··· 321 321 struct i915_vma_work *vw = container_of(work, typeof(*vw), base); 322 322 struct i915_vma_resource *vma_res = vw->vma_res; 323 323 324 + /* 325 + * We are about the bind the object, which must mean we have already 326 + * signaled the work to potentially clear/move the pages underneath. If 327 + * something went wrong at that stage then the object should have 328 + * unknown_state set, in which case we need to skip the bind. 329 + */ 330 + if (i915_gem_object_has_unknown_state(vw->obj)) 331 + return; 332 + 324 333 vma_res->ops->bind_vma(vma_res->vm, &vw->stash, 325 334 vma_res, vw->cache_level, vw->flags); 326 - 327 335 } 328 336 329 337 static void __vma_release(struct dma_fence_work *work) 330 338 { 331 339 struct i915_vma_work *vw = container_of(work, typeof(*vw), base); 332 340 333 - if (vw->pinned) 334 - i915_gem_object_put(vw->pinned); 341 + if (vw->obj) 342 + i915_gem_object_put(vw->obj); 335 343 336 344 i915_vm_free_pt_stash(vw->vm, &vw->stash); 337 345 if (vw->vma_res) ··· 525 517 } 526 518 527 519 work->base.dma.error = 0; /* enable the queue_work() */ 528 - 529 - /* 530 - * If we don't have the refcounted pages list, keep a reference 531 - * on the object to avoid waiting for the async bind to 532 - * complete in the object destruction path. 533 - */ 534 - if (!work->vma_res->bi.pages_rsgt) 535 - work->pinned = i915_gem_object_get(vma->obj); 520 + work->obj = i915_gem_object_get(vma->obj); 536 521 } else { 537 522 ret = i915_gem_object_wait_moving_fence(vma->obj, true); 538 523 if (ret) { ··· 1646 1645 GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); 1647 1646 } 1648 1647 1649 - static void release_references(struct i915_vma *vma, bool vm_ddestroy) 1648 + static void release_references(struct i915_vma *vma, struct intel_gt *gt, 1649 + bool vm_ddestroy) 1650 1650 { 1651 1651 struct drm_i915_gem_object *obj = vma->obj; 1652 - struct intel_gt *gt = vma->vm->gt; 1653 1652 1654 1653 GEM_BUG_ON(i915_vma_is_active(vma)); 1655 1654 ··· 1704 1703 1705 1704 force_unbind(vma); 1706 1705 list_del_init(&vma->vm_link); 1707 - release_references(vma, false); 1706 + release_references(vma, vma->vm->gt, false); 1708 1707 } 1709 1708 1710 1709 void i915_vma_destroy(struct i915_vma *vma) 1711 1710 { 1711 + struct intel_gt *gt; 1712 1712 bool vm_ddestroy; 1713 1713 1714 1714 mutex_lock(&vma->vm->mutex); ··· 1717 1715 list_del_init(&vma->vm_link); 1718 1716 vm_ddestroy = vma->vm_ddestroy; 1719 1717 vma->vm_ddestroy = false; 1718 + 1719 + /* vma->vm may be freed when releasing vma->vm->mutex. */ 1720 + gt = vma->vm->gt; 1720 1721 mutex_unlock(&vma->vm->mutex); 1721 - release_references(vma, vm_ddestroy); 1722 + release_references(vma, gt, vm_ddestroy); 1722 1723 } 1723 1724 1724 1725 void i915_vma_parked(struct intel_gt *gt)

+17

drivers/gpu/drm/i915/intel_device_info.c

··· 73 73 PLATFORM_NAME(XEHPSDV), 74 74 PLATFORM_NAME(DG2), 75 75 PLATFORM_NAME(PONTEVECCHIO), 76 + PLATFORM_NAME(METEORLAKE), 76 77 }; 77 78 #undef PLATFORM_NAME 78 79 ··· 190 189 191 190 static const u16 subplatform_g10_ids[] = { 192 191 INTEL_DG2_G10_IDS(0), 192 + INTEL_ATS_M150_IDS(0), 193 193 }; 194 194 195 195 static const u16 subplatform_g11_ids[] = { 196 196 INTEL_DG2_G11_IDS(0), 197 + INTEL_ATS_M75_IDS(0), 197 198 }; 198 199 199 200 static const u16 subplatform_g12_ids[] = { 200 201 INTEL_DG2_G12_IDS(0), 202 + }; 203 + 204 + static const u16 subplatform_m_ids[] = { 205 + INTEL_MTL_M_IDS(0), 206 + }; 207 + 208 + static const u16 subplatform_p_ids[] = { 209 + INTEL_MTL_P_IDS(0), 201 210 }; 202 211 203 212 static bool find_devid(u16 id, const u16 *p, unsigned int num) ··· 264 253 } else if (find_devid(devid, subplatform_g12_ids, 265 254 ARRAY_SIZE(subplatform_g12_ids))) { 266 255 mask = BIT(INTEL_SUBPLATFORM_G12); 256 + } else if (find_devid(devid, subplatform_m_ids, 257 + ARRAY_SIZE(subplatform_m_ids))) { 258 + mask = BIT(INTEL_SUBPLATFORM_M); 259 + } else if (find_devid(devid, subplatform_p_ids, 260 + ARRAY_SIZE(subplatform_p_ids))) { 261 + mask = BIT(INTEL_SUBPLATFORM_P); 267 262 } 268 263 269 264 GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK);

+5

drivers/gpu/drm/i915/intel_device_info.h

··· 89 89 INTEL_XEHPSDV, 90 90 INTEL_DG2, 91 91 INTEL_PONTEVECCHIO, 92 + INTEL_METEORLAKE, 92 93 INTEL_MAX_PLATFORMS 93 94 }; 94 95 ··· 126 125 * bit set 127 126 */ 128 127 #define INTEL_SUBPLATFORM_N 1 128 + 129 + /* MTL */ 130 + #define INTEL_SUBPLATFORM_M 0 131 + #define INTEL_SUBPLATFORM_P 1 129 132 130 133 enum intel_ppgtt_type { 131 134 INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE,

+15 -3

drivers/gpu/drm/i915/intel_memory_region.c

··· 198 198 if (mr->region_private) 199 199 ttm_resource_manager_debug(mr->region_private, printer); 200 200 else 201 - drm_printf(printer, "total:%pa, available:%pa bytes\n", 202 - &mr->total, &mr->avail); 201 + drm_printf(printer, "total:%pa bytes\n", &mr->total); 203 202 } 204 203 205 204 static int intel_memory_region_memtest(struct intel_memory_region *mem, ··· 241 242 mem->min_page_size = min_page_size; 242 243 mem->ops = ops; 243 244 mem->total = size; 244 - mem->avail = mem->total; 245 245 mem->type = type; 246 246 mem->instance = instance; 247 247 ··· 275 277 va_start(ap, fmt); 276 278 vsnprintf(mem->name, sizeof(mem->name), fmt, ap); 277 279 va_end(ap); 280 + } 281 + 282 + void intel_memory_region_avail(struct intel_memory_region *mr, 283 + u64 *avail, u64 *visible_avail) 284 + { 285 + if (mr->type == INTEL_MEMORY_LOCAL) { 286 + i915_ttm_buddy_man_avail(mr->region_private, 287 + avail, visible_avail); 288 + *avail <<= PAGE_SHIFT; 289 + *visible_avail <<= PAGE_SHIFT; 290 + } else { 291 + *avail = mr->total; 292 + *visible_avail = mr->total; 293 + } 278 294 } 279 295 280 296 void intel_memory_region_destroy(struct intel_memory_region *mem)

+3 -1

drivers/gpu/drm/i915/intel_memory_region.h

··· 75 75 resource_size_t io_size; 76 76 resource_size_t min_page_size; 77 77 resource_size_t total; 78 - resource_size_t avail; 79 78 80 79 u16 type; 81 80 u16 instance; ··· 125 126 126 127 void intel_memory_region_debug(struct intel_memory_region *mr, 127 128 struct drm_printer *printer); 129 + 130 + void intel_memory_region_avail(struct intel_memory_region *mr, 131 + u64 *avail, u64 *visible_avail); 128 132 129 133 struct intel_memory_region * 130 134 i915_gem_ttm_system_setup(struct drm_i915_private *i915,

+7 -3

drivers/gpu/drm/i915/intel_region_ttm.c

··· 152 152 * Convert an opaque TTM resource manager resource to a refcounted sg_table. 153 153 * @mem: The memory region. 154 154 * @res: The resource manager resource obtained from the TTM resource manager. 155 + * @page_alignment: Required page alignment for each sg entry. Power of two. 155 156 * 156 157 * The gem backends typically use sg-tables for operations on the underlying 157 158 * io_memory. So provide a way for the backends to translate the ··· 162 161 */ 163 162 struct i915_refct_sgt * 164 163 intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, 165 - struct ttm_resource *res) 164 + struct ttm_resource *res, 165 + u32 page_alignment) 166 166 { 167 167 if (mem->is_range_manager) { 168 168 struct ttm_range_mgr_node *range_node = 169 169 to_ttm_range_mgr_node(res); 170 170 171 171 return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], 172 - mem->region.start); 172 + mem->region.start, 173 + page_alignment); 173 174 } else { 174 - return i915_rsgt_from_buddy_resource(res, mem->region.start); 175 + return i915_rsgt_from_buddy_resource(res, mem->region.start, 176 + page_alignment); 175 177 } 176 178 } 177 179

+2 -1

drivers/gpu/drm/i915/intel_region_ttm.h

··· 24 24 25 25 struct i915_refct_sgt * 26 26 intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, 27 - struct ttm_resource *res); 27 + struct ttm_resource *res, 28 + u32 page_alignment); 28 29 29 30 void intel_region_ttm_resource_free(struct intel_memory_region *mem, 30 31 struct ttm_resource *res);

+1 -1

drivers/gpu/drm/i915/selftests/i915_gem_gtt.c

··· 742 742 u64 addr; 743 743 744 744 for (addr = round_up(hole_start + min_alignment, step) - min_alignment; 745 - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; 745 + hole_end > addr && hole_end - addr >= 2 * min_alignment; 746 746 addr += step) { 747 747 err = i915_vma_pin(vma, 0, 0, addr | flags); 748 748 if (err) {

+19 -2

drivers/gpu/drm/i915/selftests/intel_memory_region.c

··· 451 451 452 452 static int igt_mock_max_segment(void *arg) 453 453 { 454 - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); 455 454 struct intel_memory_region *mem = arg; 456 455 struct drm_i915_private *i915 = mem->i915; 457 456 struct i915_ttm_buddy_resource *res; ··· 459 460 struct drm_buddy *mm; 460 461 struct list_head *blocks; 461 462 struct scatterlist *sg; 463 + I915_RND_STATE(prng); 462 464 LIST_HEAD(objects); 465 + unsigned int max_segment; 466 + unsigned int ps; 463 467 u64 size; 464 468 int err = 0; 465 469 ··· 474 472 */ 475 473 476 474 size = SZ_8G; 477 - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); 475 + ps = PAGE_SIZE; 476 + if (i915_prandom_u64_state(&prng) & 1) 477 + ps = SZ_64K; /* For something like DG2 */ 478 + 479 + max_segment = round_down(UINT_MAX, ps); 480 + 481 + mem = mock_region_create(i915, 0, size, ps, 0, 0); 478 482 if (IS_ERR(mem)) 479 483 return PTR_ERR(mem); 480 484 ··· 506 498 } 507 499 508 500 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { 501 + dma_addr_t daddr = sg_dma_address(sg); 502 + 509 503 if (sg->length > max_segment) { 510 504 pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", 511 505 __func__, sg->length, max_segment); 506 + err = -EINVAL; 507 + goto out_close; 508 + } 509 + 510 + if (!IS_ALIGNED(daddr, ps)) { 511 + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", 512 + __func__, &daddr, ps); 512 513 err = -EINVAL; 513 514 goto out_close; 514 515 }

+2 -1

drivers/gpu/drm/i915/selftests/mock_region.c

··· 33 33 return PTR_ERR(obj->mm.res); 34 34 35 35 obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, 36 - obj->mm.res); 36 + obj->mm.res, 37 + obj->mm.region->min_page_size); 37 38 if (IS_ERR(obj->mm.rsgt)) { 38 39 err = PTR_ERR(obj->mm.rsgt); 39 40 goto err_free_resource;

+36 -3

include/drm/i915_pciids.h

··· 696 696 #define INTEL_DG2_G10_IDS(info) \ 697 697 INTEL_VGA_DEVICE(0x5690, info), \ 698 698 INTEL_VGA_DEVICE(0x5691, info), \ 699 - INTEL_VGA_DEVICE(0x5692, info) 699 + INTEL_VGA_DEVICE(0x5692, info), \ 700 + INTEL_VGA_DEVICE(0x56A0, info), \ 701 + INTEL_VGA_DEVICE(0x56A1, info), \ 702 + INTEL_VGA_DEVICE(0x56A2, info) 700 703 701 704 #define INTEL_DG2_G11_IDS(info) \ 702 705 INTEL_VGA_DEVICE(0x5693, info), \ 703 706 INTEL_VGA_DEVICE(0x5694, info), \ 704 707 INTEL_VGA_DEVICE(0x5695, info), \ 705 - INTEL_VGA_DEVICE(0x56B0, info) 708 + INTEL_VGA_DEVICE(0x5698, info), \ 709 + INTEL_VGA_DEVICE(0x56A5, info), \ 710 + INTEL_VGA_DEVICE(0x56A6, info), \ 711 + INTEL_VGA_DEVICE(0x56B0, info), \ 712 + INTEL_VGA_DEVICE(0x56B1, info) 706 713 707 714 #define INTEL_DG2_G12_IDS(info) \ 708 715 INTEL_VGA_DEVICE(0x5696, info), \ 709 716 INTEL_VGA_DEVICE(0x5697, info), \ 710 - INTEL_VGA_DEVICE(0x56B2, info) 717 + INTEL_VGA_DEVICE(0x56A3, info), \ 718 + INTEL_VGA_DEVICE(0x56A4, info), \ 719 + INTEL_VGA_DEVICE(0x56B2, info), \ 720 + INTEL_VGA_DEVICE(0x56B3, info) 711 721 712 722 #define INTEL_DG2_IDS(info) \ 713 723 INTEL_DG2_G10_IDS(info), \ 714 724 INTEL_DG2_G11_IDS(info), \ 715 725 INTEL_DG2_G12_IDS(info) 726 + 727 + #define INTEL_ATS_M150_IDS(info) \ 728 + INTEL_VGA_DEVICE(0x56C0, info) 729 + 730 + #define INTEL_ATS_M75_IDS(info) \ 731 + INTEL_VGA_DEVICE(0x56C1, info) 732 + 733 + #define INTEL_ATS_M_IDS(info) \ 734 + INTEL_ATS_M150_IDS(info), \ 735 + INTEL_ATS_M75_IDS(info) 736 + /* MTL */ 737 + #define INTEL_MTL_M_IDS(info) \ 738 + INTEL_VGA_DEVICE(0x7D40, info), \ 739 + INTEL_VGA_DEVICE(0x7D60, info) 740 + 741 + #define INTEL_MTL_P_IDS(info) \ 742 + INTEL_VGA_DEVICE(0x7D45, info), \ 743 + INTEL_VGA_DEVICE(0x7D55, info), \ 744 + INTEL_VGA_DEVICE(0x7DD5, info) 745 + 746 + #define INTEL_MTL_IDS(info) \ 747 + INTEL_MTL_M_IDS(info), \ 748 + INTEL_MTL_P_IDS(info) 716 749 717 750 #endif /* _I915_PCIIDS_H */

+289 -92

include/uapi/drm/i915_drm.h

··· 751 751 752 752 /* Must be kept compact -- no holes and well documented */ 753 753 754 - typedef struct drm_i915_getparam { 754 + /** 755 + * struct drm_i915_getparam - Driver parameter query structure. 756 + */ 757 + struct drm_i915_getparam { 758 + /** @param: Driver parameter to query. */ 755 759 __s32 param; 756 - /* 760 + 761 + /** 762 + * @value: Address of memory where queried value should be put. 763 + * 757 764 * WARNING: Using pointers instead of fixed-size u64 means we need to write 758 765 * compat32 code. Don't repeat this mistake. 759 766 */ 760 767 int __user *value; 761 - } drm_i915_getparam_t; 768 + }; 769 + 770 + /** 771 + * typedef drm_i915_getparam_t - Driver parameter query structure. 772 + * See struct drm_i915_getparam. 773 + */ 774 + typedef struct drm_i915_getparam drm_i915_getparam_t; 762 775 763 776 /* Ioctl to set kernel params: 764 777 */ ··· 1252 1239 __u64 rsvd2; 1253 1240 }; 1254 1241 1242 + /** 1243 + * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf 1244 + * ioctl. 1245 + * 1246 + * The request will wait for input fence to signal before submission. 1247 + * 1248 + * The returned output fence will be signaled after the completion of the 1249 + * request. 1250 + */ 1255 1251 struct drm_i915_gem_exec_fence { 1256 - /** 1257 - * User's handle for a drm_syncobj to wait on or signal. 1258 - */ 1252 + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ 1259 1253 __u32 handle; 1260 1254 1255 + /** 1256 + * @flags: Supported flags are: 1257 + * 1258 + * I915_EXEC_FENCE_WAIT: 1259 + * Wait for the input fence before request submission. 1260 + * 1261 + * I915_EXEC_FENCE_SIGNAL: 1262 + * Return request completion fence as output 1263 + */ 1264 + __u32 flags; 1261 1265 #define I915_EXEC_FENCE_WAIT (1<<0) 1262 1266 #define I915_EXEC_FENCE_SIGNAL (1<<1) 1263 1267 #define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) 1264 - __u32 flags; 1265 1268 }; 1266 1269 1267 - /* 1268 - * See drm_i915_gem_execbuffer_ext_timeline_fences. 1269 - */ 1270 - #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 1271 - 1272 - /* 1270 + /** 1271 + * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences 1272 + * for execbuf ioctl. 1273 + * 1273 1274 * This structure describes an array of drm_syncobj and associated points for 1274 1275 * timeline variants of drm_syncobj. It is invalid to append this structure to 1275 1276 * the execbuf if I915_EXEC_FENCE_ARRAY is set. 1276 1277 */ 1277 1278 struct drm_i915_gem_execbuffer_ext_timeline_fences { 1279 + #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 1280 + /** @base: Extension link. See struct i915_user_extension. */ 1278 1281 struct i915_user_extension base; 1279 1282 1280 1283 /** 1281 - * Number of element in the handles_ptr & value_ptr arrays. 1284 + * @fence_count: Number of elements in the @handles_ptr & @value_ptr 1285 + * arrays. 1282 1286 */ 1283 1287 __u64 fence_count; 1284 1288 1285 1289 /** 1286 - * Pointer to an array of struct drm_i915_gem_exec_fence of length 1287 - * fence_count. 1290 + * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence 1291 + * of length @fence_count. 1288 1292 */ 1289 1293 __u64 handles_ptr; 1290 1294 1291 1295 /** 1292 - * Pointer to an array of u64 values of length fence_count. Values 1293 - * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline 1294 - * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. 1296 + * @values_ptr: Pointer to an array of u64 values of length 1297 + * @fence_count. 1298 + * Values must be 0 for a binary drm_syncobj. A Value of 0 for a 1299 + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a 1300 + * binary one. 1295 1301 */ 1296 1302 __u64 values_ptr; 1297 1303 }; 1298 1304 1305 + /** 1306 + * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 1307 + * ioctl. 1308 + */ 1299 1309 struct drm_i915_gem_execbuffer2 { 1300 - /** 1301 - * List of gem_exec_object2 structs 1302 - */ 1310 + /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ 1303 1311 __u64 buffers_ptr; 1312 + 1313 + /** @buffer_count: Number of elements in @buffers_ptr array */ 1304 1314 __u32 buffer_count; 1305 1315 1306 - /** Offset in the batchbuffer to start execution from. */ 1307 - __u32 batch_start_offset; 1308 - /** Bytes used in batchbuffer from batch_start_offset */ 1309 - __u32 batch_len; 1310 - __u32 DR1; 1311 - __u32 DR4; 1312 - __u32 num_cliprects; 1313 1316 /** 1314 - * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY 1315 - * & I915_EXEC_USE_EXTENSIONS are not set. 1317 + * @batch_start_offset: Offset in the batchbuffer to start execution 1318 + * from. 1319 + */ 1320 + __u32 batch_start_offset; 1321 + 1322 + /** 1323 + * @batch_len: Length in bytes of the batch buffer, starting from the 1324 + * @batch_start_offset. If 0, length is assumed to be the batch buffer 1325 + * object size. 1326 + */ 1327 + __u32 batch_len; 1328 + 1329 + /** @DR1: deprecated */ 1330 + __u32 DR1; 1331 + 1332 + /** @DR4: deprecated */ 1333 + __u32 DR4; 1334 + 1335 + /** @num_cliprects: See @cliprects_ptr */ 1336 + __u32 num_cliprects; 1337 + 1338 + /** 1339 + * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. 1340 + * 1341 + * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or 1342 + * I915_EXEC_USE_EXTENSIONS flags are not set. 1316 1343 * 1317 1344 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array 1318 - * of struct drm_i915_gem_exec_fence and num_cliprects is the length 1319 - * of the array. 1345 + * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the 1346 + * array. 1320 1347 * 1321 1348 * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a 1322 - * single struct i915_user_extension and num_cliprects is 0. 1349 + * single &i915_user_extension and num_cliprects is 0. 1323 1350 */ 1324 1351 __u64 cliprects_ptr; 1352 + 1353 + /** @flags: Execbuf flags */ 1354 + __u64 flags; 1325 1355 #define I915_EXEC_RING_MASK (0x3f) 1326 1356 #define I915_EXEC_DEFAULT (0<<0) 1327 1357 #define I915_EXEC_RENDER (1<<0) ··· 1382 1326 #define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ 1383 1327 #define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) 1384 1328 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ 1385 - __u64 flags; 1386 - __u64 rsvd1; /* now used for context info */ 1387 - __u64 rsvd2; 1388 - }; 1389 1329 1390 1330 /** Resets the SO write offset registers for transform feedback on gen7. */ 1391 1331 #define I915_EXEC_GEN7_SOL_RESET (1<<8) ··· 1484 1432 * drm_i915_gem_execbuffer_ext enum. 1485 1433 */ 1486 1434 #define I915_EXEC_USE_EXTENSIONS (1 << 21) 1487 - 1488 1435 #define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) 1436 + 1437 + /** @rsvd1: Context id */ 1438 + __u64 rsvd1; 1439 + 1440 + /** 1441 + * @rsvd2: in and out sync_file file descriptors. 1442 + * 1443 + * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the 1444 + * lower 32 bits of this field will have the in sync_file fd (input). 1445 + * 1446 + * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this 1447 + * field will have the out sync_file fd (output). 1448 + */ 1449 + __u64 rsvd2; 1450 + }; 1489 1451 1490 1452 #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) 1491 1453 #define i915_execbuffer2_set_context_id(eb2, context) \ ··· 1880 1814 __u32 pad; 1881 1815 }; 1882 1816 1817 + /** 1818 + * struct drm_i915_gem_context_create_ext - Structure for creating contexts. 1819 + */ 1883 1820 struct drm_i915_gem_context_create_ext { 1884 - __u32 ctx_id; /* output: id of new context*/ 1821 + /** @ctx_id: Id of the created context (output) */ 1822 + __u32 ctx_id; 1823 + 1824 + /** 1825 + * @flags: Supported flags are: 1826 + * 1827 + * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: 1828 + * 1829 + * Extensions may be appended to this structure and driver must check 1830 + * for those. See @extensions. 1831 + * 1832 + * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE 1833 + * 1834 + * Created context will have single timeline. 1835 + */ 1885 1836 __u32 flags; 1886 1837 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) 1887 1838 #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) 1888 1839 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ 1889 1840 (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) 1841 + 1842 + /** 1843 + * @extensions: Zero-terminated chain of extensions. 1844 + * 1845 + * I915_CONTEXT_CREATE_EXT_SETPARAM: 1846 + * Context parameter to set or query during context creation. 1847 + * See struct drm_i915_gem_context_create_ext_setparam. 1848 + * 1849 + * I915_CONTEXT_CREATE_EXT_CLONE: 1850 + * This extension has been removed. On the off chance someone somewhere 1851 + * has attempted to use it, never re-use this extension number. 1852 + */ 1890 1853 __u64 extensions; 1854 + #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 1855 + #define I915_CONTEXT_CREATE_EXT_CLONE 1 1891 1856 }; 1892 1857 1858 + /** 1859 + * struct drm_i915_gem_context_param - Context parameter to set or query. 1860 + */ 1893 1861 struct drm_i915_gem_context_param { 1862 + /** @ctx_id: Context id */ 1894 1863 __u32 ctx_id; 1864 + 1865 + /** @size: Size of the parameter @value */ 1895 1866 __u32 size; 1867 + 1868 + /** @param: Parameter to set or query */ 1896 1869 __u64 param; 1897 1870 #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 1898 1871 /* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance ··· 2078 1973 #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd 2079 1974 /* Must be kept compact -- no holes and well documented */ 2080 1975 1976 + /** @value: Context parameter value to be set or queried */ 2081 1977 __u64 value; 2082 1978 }; 2083 1979 ··· 2477 2371 struct i915_engine_class_instance engines[N__]; \ 2478 2372 } __attribute__((packed)) name__ 2479 2373 2374 + /** 2375 + * struct drm_i915_gem_context_create_ext_setparam - Context parameter 2376 + * to set or query during context creation. 2377 + */ 2480 2378 struct drm_i915_gem_context_create_ext_setparam { 2481 - #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 2379 + /** @base: Extension link. See struct i915_user_extension. */ 2482 2380 struct i915_user_extension base; 2381 + 2382 + /** 2383 + * @param: Context parameter to set or query. 2384 + * See struct drm_i915_gem_context_param. 2385 + */ 2483 2386 struct drm_i915_gem_context_param param; 2484 2387 }; 2485 - 2486 - /* This API has been removed. On the off chance someone somewhere has 2487 - * attempted to use it, never re-use this extension number. 2488 - */ 2489 - #define I915_CONTEXT_CREATE_EXT_CLONE 1 2490 2388 2491 2389 struct drm_i915_gem_context_destroy { 2492 2390 __u32 ctx_id; 2493 2391 __u32 pad; 2494 2392 }; 2495 2393 2496 - /* 2394 + /** 2395 + * struct drm_i915_gem_vm_control - Structure to create or destroy VM. 2396 + * 2497 2397 * DRM_I915_GEM_VM_CREATE - 2498 2398 * 2499 2399 * Create a new virtual memory address space (ppGTT) for use within a context ··· 2509 2397 * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is 2510 2398 * returned in the outparam @id. 2511 2399 * 2512 - * No flags are defined, with all bits reserved and must be zero. 2513 - * 2514 2400 * An extension chain maybe provided, starting with @extensions, and terminated 2515 2401 * by the @next_extension being 0. Currently, no extensions are defined. 2516 2402 * 2517 2403 * DRM_I915_GEM_VM_DESTROY - 2518 2404 * 2519 - * Destroys a previously created VM id, specified in @id. 2405 + * Destroys a previously created VM id, specified in @vm_id. 2520 2406 * 2521 2407 * No extensions or flags are allowed currently, and so must be zero. 2522 2408 */ 2523 2409 struct drm_i915_gem_vm_control { 2410 + /** @extensions: Zero-terminated chain of extensions. */ 2524 2411 __u64 extensions; 2412 + 2413 + /** @flags: reserved for future usage, currently MBZ */ 2525 2414 __u32 flags; 2415 + 2416 + /** @vm_id: Id of the VM created or to be destroyed */ 2526 2417 __u32 vm_id; 2527 2418 }; 2528 2419 ··· 3322 3207 * struct drm_i915_memory_region_info - Describes one region as known to the 3323 3208 * driver. 3324 3209 * 3325 - * Note that we reserve some stuff here for potential future work. As an example 3326 - * we might want expose the capabilities for a given region, which could include 3327 - * things like if the region is CPU mappable/accessible, what are the supported 3328 - * mapping types etc. 3329 - * 3330 - * Note that to extend struct drm_i915_memory_region_info and struct 3331 - * drm_i915_query_memory_regions in the future the plan is to do the following: 3332 - * 3333 - * .. code-block:: C 3334 - * 3335 - * struct drm_i915_memory_region_info { 3336 - * struct drm_i915_gem_memory_class_instance region; 3337 - * union { 3338 - * __u32 rsvd0; 3339 - * __u32 new_thing1; 3340 - * }; 3341 - * ... 3342 - * union { 3343 - * __u64 rsvd1[8]; 3344 - * struct { 3345 - * __u64 new_thing2; 3346 - * __u64 new_thing3; 3347 - * ... 3348 - * }; 3349 - * }; 3350 - * }; 3351 - * 3352 - * With this things should remain source compatible between versions for 3353 - * userspace, even as we add new fields. 3354 - * 3355 3210 * Note this is using both struct drm_i915_query_item and struct drm_i915_query. 3356 3211 * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS 3357 3212 * at &drm_i915_query_item.query_id. ··· 3333 3248 /** @rsvd0: MBZ */ 3334 3249 __u32 rsvd0; 3335 3250 3336 - /** @probed_size: Memory probed by the driver (-1 = unknown) */ 3251 + /** 3252 + * @probed_size: Memory probed by the driver 3253 + * 3254 + * Note that it should not be possible to ever encounter a zero value 3255 + * here, also note that no current region type will ever return -1 here. 3256 + * Although for future region types, this might be a possibility. The 3257 + * same applies to the other size fields. 3258 + */ 3337 3259 __u64 probed_size; 3338 3260 3339 - /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */ 3261 + /** 3262 + * @unallocated_size: Estimate of memory remaining 3263 + * 3264 + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. 3265 + * Without this (or if this is an older kernel) the value here will 3266 + * always equal the @probed_size. Note this is only currently tracked 3267 + * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here 3268 + * will always equal the @probed_size). 3269 + */ 3340 3270 __u64 unallocated_size; 3341 3271 3342 - /** @rsvd1: MBZ */ 3343 - __u64 rsvd1[8]; 3272 + union { 3273 + /** @rsvd1: MBZ */ 3274 + __u64 rsvd1[8]; 3275 + struct { 3276 + /** 3277 + * @probed_cpu_visible_size: Memory probed by the driver 3278 + * that is CPU accessible. 3279 + * 3280 + * This will be always be <= @probed_size, and the 3281 + * remainder (if there is any) will not be CPU 3282 + * accessible. 3283 + * 3284 + * On systems without small BAR, the @probed_size will 3285 + * always equal the @probed_cpu_visible_size, since all 3286 + * of it will be CPU accessible. 3287 + * 3288 + * Note this is only tracked for 3289 + * I915_MEMORY_CLASS_DEVICE regions (for other types the 3290 + * value here will always equal the @probed_size). 3291 + * 3292 + * Note that if the value returned here is zero, then 3293 + * this must be an old kernel which lacks the relevant 3294 + * small-bar uAPI support (including 3295 + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on 3296 + * such systems we should never actually end up with a 3297 + * small BAR configuration, assuming we are able to load 3298 + * the kernel module. Hence it should be safe to treat 3299 + * this the same as when @probed_cpu_visible_size == 3300 + * @probed_size. 3301 + */ 3302 + __u64 probed_cpu_visible_size; 3303 + 3304 + /** 3305 + * @unallocated_cpu_visible_size: Estimate of CPU 3306 + * visible memory remaining. 3307 + * 3308 + * Note this is only tracked for 3309 + * I915_MEMORY_CLASS_DEVICE regions (for other types the 3310 + * value here will always equal the 3311 + * @probed_cpu_visible_size). 3312 + * 3313 + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable 3314 + * accounting. Without this the value here will always 3315 + * equal the @probed_cpu_visible_size. Note this is only 3316 + * currently tracked for I915_MEMORY_CLASS_DEVICE 3317 + * regions (for other types the value here will also 3318 + * always equal the @probed_cpu_visible_size). 3319 + * 3320 + * If this is an older kernel the value here will be 3321 + * zero, see also @probed_cpu_visible_size. 3322 + */ 3323 + __u64 unallocated_cpu_visible_size; 3324 + }; 3325 + }; 3344 3326 }; 3345 3327 3346 3328 /** ··· 3481 3329 * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added 3482 3330 * extension support using struct i915_user_extension. 3483 3331 * 3484 - * Note that in the future we want to have our buffer flags here, at least for 3485 - * the stuff that is immutable. Previously we would have two ioctls, one to 3486 - * create the object with gem_create, and another to apply various parameters, 3487 - * however this creates some ambiguity for the params which are considered 3488 - * immutable. Also in general we're phasing out the various SET/GET ioctls. 3332 + * Note that new buffer flags should be added here, at least for the stuff that 3333 + * is immutable. Previously we would have two ioctls, one to create the object 3334 + * with gem_create, and another to apply various parameters, however this 3335 + * creates some ambiguity for the params which are considered immutable. Also in 3336 + * general we're phasing out the various SET/GET ioctls. 3489 3337 */ 3490 3338 struct drm_i915_gem_create_ext { 3491 3339 /** 3492 3340 * @size: Requested size for the object. 3493 3341 * 3494 3342 * The (page-aligned) allocated size for the object will be returned. 3495 - * 3496 3343 * 3497 3344 * DG2 64K min page size implications: 3498 3345 * ··· 3504 3353 * 3505 3354 * Note that the returned size here will always reflect any required 3506 3355 * rounding up done by the kernel, i.e 4K will now become 64K on devices 3507 - * such as DG2. 3356 + * such as DG2. The kernel will always select the largest minimum 3357 + * page-size for the set of possible placements as the value to use when 3358 + * rounding up the @size. 3508 3359 * 3509 3360 * Special DG2 GTT address alignment requirement: 3510 3361 * ··· 3530 3377 * is deemed to be a good compromise. 3531 3378 */ 3532 3379 __u64 size; 3380 + 3533 3381 /** 3534 3382 * @handle: Returned handle for the object. 3535 3383 * 3536 3384 * Object handles are nonzero. 3537 3385 */ 3538 3386 __u32 handle; 3539 - /** @flags: MBZ */ 3387 + 3388 + /** 3389 + * @flags: Optional flags. 3390 + * 3391 + * Supported values: 3392 + * 3393 + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that 3394 + * the object will need to be accessed via the CPU. 3395 + * 3396 + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only 3397 + * strictly required on configurations where some subset of the device 3398 + * memory is directly visible/mappable through the CPU (which we also 3399 + * call small BAR), like on some DG2+ systems. Note that this is quite 3400 + * undesirable, but due to various factors like the client CPU, BIOS etc 3401 + * it's something we can expect to see in the wild. See 3402 + * &drm_i915_memory_region_info.probed_cpu_visible_size for how to 3403 + * determine if this system applies. 3404 + * 3405 + * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to 3406 + * ensure the kernel can always spill the allocation to system memory, 3407 + * if the object can't be allocated in the mappable part of 3408 + * I915_MEMORY_CLASS_DEVICE. 3409 + * 3410 + * Also note that since the kernel only supports flat-CCS on objects 3411 + * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore 3412 + * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with 3413 + * flat-CCS. 3414 + * 3415 + * Without this hint, the kernel will assume that non-mappable 3416 + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the 3417 + * kernel can still migrate the object to the mappable part, as a last 3418 + * resort, if userspace ever CPU faults this object, but this might be 3419 + * expensive, and so ideally should be avoided. 3420 + * 3421 + * On older kernels which lack the relevant small-bar uAPI support (see 3422 + * also &drm_i915_memory_region_info.probed_cpu_visible_size), 3423 + * usage of the flag will result in an error, but it should NEVER be 3424 + * possible to end up with a small BAR configuration, assuming we can 3425 + * also successfully load the i915 kernel module. In such cases the 3426 + * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as 3427 + * such there are zero restrictions on where the object can be placed. 3428 + */ 3429 + #define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) 3540 3430 __u32 flags; 3431 + 3541 3432 /** 3542 3433 * @extensions: The chain of extensions to apply to this object. 3543 3434 *