Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Make the physical object coherent with GTT

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville
v3: Remove the now duplicate call to detach_phys_object during free.
v4: Wait for rendering before pwrite. As this patch makes it possible to
render into the phys object, we should make it correct as well!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

authored by

Chris Wilson and committed by
Daniel Vetter
6a2c4232 132f3f17

+149 -66
+3
drivers/gpu/drm/i915/i915_dma.c
··· 1027 1027 case I915_PARAM_CMD_PARSER_VERSION: 1028 1028 value = i915_cmd_parser_get_version(); 1029 1029 break; 1030 + case I915_PARAM_HAS_COHERENT_PHYS_GTT: 1031 + value = 1; 1032 + break; 1030 1033 default: 1031 1034 DRM_DEBUG("Unknown parameter %d\n", param->param); 1032 1035 return -EINVAL;
+3 -3
drivers/gpu/drm/i915/i915_drv.h
··· 1957 1957 unsigned long user_pin_count; 1958 1958 struct drm_file *pin_filp; 1959 1959 1960 - /** for phy allocated objects */ 1961 - struct drm_dma_handle *phys_handle; 1962 - 1963 1960 union { 1961 + /** for phy allocated objects */ 1962 + struct drm_dma_handle *phys_handle; 1963 + 1964 1964 struct i915_gem_userptr { 1965 1965 uintptr_t ptr; 1966 1966 unsigned read_only :1;
+142 -63
drivers/gpu/drm/i915/i915_gem.c
··· 208 208 return 0; 209 209 } 210 210 211 - static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 211 + static int 212 + i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 212 213 { 213 - drm_dma_handle_t *phys = obj->phys_handle; 214 + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 215 + char *vaddr = obj->phys_handle->vaddr; 216 + struct sg_table *st; 217 + struct scatterlist *sg; 218 + int i; 214 219 215 - if (!phys) 216 - return; 220 + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 221 + return -EINVAL; 217 222 218 - if (obj->madv == I915_MADV_WILLNEED) { 223 + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 224 + struct page *page; 225 + char *src; 226 + 227 + page = shmem_read_mapping_page(mapping, i); 228 + if (IS_ERR(page)) 229 + return PTR_ERR(page); 230 + 231 + src = kmap_atomic(page); 232 + memcpy(vaddr, src, PAGE_SIZE); 233 + drm_clflush_virt_range(vaddr, PAGE_SIZE); 234 + kunmap_atomic(src); 235 + 236 + page_cache_release(page); 237 + vaddr += PAGE_SIZE; 238 + } 239 + 240 + i915_gem_chipset_flush(obj->base.dev); 241 + 242 + st = kmalloc(sizeof(*st), GFP_KERNEL); 243 + if (st == NULL) 244 + return -ENOMEM; 245 + 246 + if (sg_alloc_table(st, 1, GFP_KERNEL)) { 247 + kfree(st); 248 + return -ENOMEM; 249 + } 250 + 251 + sg = st->sgl; 252 + sg->offset = 0; 253 + sg->length = obj->base.size; 254 + 255 + sg_dma_address(sg) = obj->phys_handle->busaddr; 256 + sg_dma_len(sg) = obj->base.size; 257 + 258 + obj->pages = st; 259 + obj->has_dma_mapping = true; 260 + return 0; 261 + } 262 + 263 + static void 264 + i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 265 + { 266 + int ret; 267 + 268 + BUG_ON(obj->madv == __I915_MADV_PURGED); 269 + 270 + ret = i915_gem_object_set_to_cpu_domain(obj, true); 271 + if (ret) { 272 + /* In the event of a disaster, abandon all caches and 273 + * hope for the best. 274 + */ 275 + WARN_ON(ret != -EIO); 276 + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 277 + } 278 + 279 + if (obj->madv == I915_MADV_DONTNEED) 280 + obj->dirty = 0; 281 + 282 + if (obj->dirty) { 219 283 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 220 - char *vaddr = phys->vaddr; 284 + char *vaddr = obj->phys_handle->vaddr; 221 285 int i; 222 286 223 287 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 224 - struct page *page = shmem_read_mapping_page(mapping, i); 225 - if (!IS_ERR(page)) { 226 - char *dst = kmap_atomic(page); 227 - memcpy(dst, vaddr, PAGE_SIZE); 228 - drm_clflush_virt_range(dst, PAGE_SIZE); 229 - kunmap_atomic(dst); 288 + struct page *page; 289 + char *dst; 230 290 231 - set_page_dirty(page); 291 + page = shmem_read_mapping_page(mapping, i); 292 + if (IS_ERR(page)) 293 + continue; 294 + 295 + dst = kmap_atomic(page); 296 + drm_clflush_virt_range(vaddr, PAGE_SIZE); 297 + memcpy(dst, vaddr, PAGE_SIZE); 298 + kunmap_atomic(dst); 299 + 300 + set_page_dirty(page); 301 + if (obj->madv == I915_MADV_WILLNEED) 232 302 mark_page_accessed(page); 233 - page_cache_release(page); 234 - } 303 + page_cache_release(page); 235 304 vaddr += PAGE_SIZE; 236 305 } 237 - i915_gem_chipset_flush(obj->base.dev); 306 + obj->dirty = 0; 238 307 } 239 308 240 - #ifdef CONFIG_X86 241 - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 242 - #endif 243 - drm_pci_free(obj->base.dev, phys); 244 - obj->phys_handle = NULL; 309 + sg_free_table(obj->pages); 310 + kfree(obj->pages); 311 + 312 + obj->has_dma_mapping = false; 313 + } 314 + 315 + static void 316 + i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 317 + { 318 + drm_pci_free(obj->base.dev, obj->phys_handle); 319 + } 320 + 321 + static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 322 + .get_pages = i915_gem_object_get_pages_phys, 323 + .put_pages = i915_gem_object_put_pages_phys, 324 + .release = i915_gem_object_release_phys, 325 + }; 326 + 327 + static int 328 + drop_pages(struct drm_i915_gem_object *obj) 329 + { 330 + struct i915_vma *vma, *next; 331 + int ret; 332 + 333 + drm_gem_object_reference(&obj->base); 334 + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 335 + if (i915_vma_unbind(vma)) 336 + break; 337 + 338 + ret = i915_gem_object_put_pages(obj); 339 + drm_gem_object_unreference(&obj->base); 340 + 341 + return ret; 245 342 } 246 343 247 344 int ··· 346 249 int align) 347 250 { 348 251 drm_dma_handle_t *phys; 349 - struct address_space *mapping; 350 - char *vaddr; 351 - int i; 252 + int ret; 352 253 353 254 if (obj->phys_handle) { 354 255 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) ··· 361 266 if (obj->base.filp == NULL) 362 267 return -EINVAL; 363 268 269 + ret = drop_pages(obj); 270 + if (ret) 271 + return ret; 272 + 364 273 /* create a new object */ 365 274 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 366 275 if (!phys) 367 276 return -ENOMEM; 368 277 369 - vaddr = phys->vaddr; 370 - #ifdef CONFIG_X86 371 - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 372 - #endif 373 - mapping = file_inode(obj->base.filp)->i_mapping; 374 - for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 375 - struct page *page; 376 - char *src; 377 - 378 - page = shmem_read_mapping_page(mapping, i); 379 - if (IS_ERR(page)) { 380 - #ifdef CONFIG_X86 381 - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 382 - #endif 383 - drm_pci_free(obj->base.dev, phys); 384 - return PTR_ERR(page); 385 - } 386 - 387 - src = kmap_atomic(page); 388 - memcpy(vaddr, src, PAGE_SIZE); 389 - kunmap_atomic(src); 390 - 391 - mark_page_accessed(page); 392 - page_cache_release(page); 393 - 394 - vaddr += PAGE_SIZE; 395 - } 396 - 397 278 obj->phys_handle = phys; 398 - return 0; 279 + obj->ops = &i915_gem_phys_ops; 280 + 281 + return i915_gem_object_get_pages(obj); 399 282 } 400 283 401 284 static int ··· 384 311 struct drm_device *dev = obj->base.dev; 385 312 void *vaddr = obj->phys_handle->vaddr + args->offset; 386 313 char __user *user_data = to_user_ptr(args->data_ptr); 314 + int ret; 315 + 316 + /* We manually control the domain here and pretend that it 317 + * remains coherent i.e. in the GTT domain, like shmem_pwrite. 318 + */ 319 + ret = i915_gem_object_wait_rendering(obj, false); 320 + if (ret) 321 + return ret; 387 322 388 323 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 389 324 unsigned long unwritten; ··· 407 326 return -EFAULT; 408 327 } 409 328 329 + drm_clflush_virt_range(vaddr, args->size); 410 330 i915_gem_chipset_flush(dev); 411 331 return 0; 412 332 } ··· 1128 1046 * pread/pwrite currently are reading and writing from the CPU 1129 1047 * perspective, requiring manual detiling by the client. 1130 1048 */ 1131 - if (obj->phys_handle) { 1132 - ret = i915_gem_phys_pwrite(obj, args, file); 1133 - goto out; 1134 - } 1135 - 1136 1049 if (obj->tiling_mode == I915_TILING_NONE && 1137 1050 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1138 1051 cpu_write_needs_clflush(obj)) { ··· 1137 1060 * textures). Fallback to the shmem path in that case. */ 1138 1061 } 1139 1062 1140 - if (ret == -EFAULT || ret == -ENOSPC) 1141 - ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1063 + if (ret == -EFAULT || ret == -ENOSPC) { 1064 + if (obj->phys_handle) 1065 + ret = i915_gem_phys_pwrite(obj, args, file); 1066 + else 1067 + ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1068 + } 1142 1069 1143 1070 out: 1144 1071 drm_gem_object_unreference(&obj->base); ··· 3590 3509 * Stolen memory is always coherent with the GPU as it is explicitly 3591 3510 * marked as wc by the system, or the system is cache-coherent. 3592 3511 */ 3593 - if (obj->stolen) 3512 + if (obj->stolen || obj->phys_handle) 3594 3513 return false; 3595 3514 3596 3515 /* If the GPU is snooping the contents of the CPU cache, ··· 4551 4470 dev_priv->mm.interruptible = was_interruptible; 4552 4471 } 4553 4472 } 4554 - 4555 - i915_gem_object_detach_phys(obj); 4556 4473 4557 4474 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4558 4475 * before progressing. */
+1
include/uapi/drm/i915_drm.h
··· 340 340 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 341 341 #define I915_PARAM_HAS_WT 27 342 342 #define I915_PARAM_CMD_PARSER_VERSION 28 343 + #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 343 344 344 345 typedef struct drm_i915_getparam { 345 346 int param;