Merge tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/drm/drm

+107 -67

drivers/gpu/drm/ttm/ttm_bo_vm.c

··· 42 42 #include <linux/uaccess.h> 43 43 #include <linux/mem_encrypt.h> 44 44 45 - #define TTM_BO_VM_NUM_PREFAULT 16 46 - 47 45 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, 48 46 struct vm_fault *vmf) 49 47 { ··· 104 106 + page_offset; 105 107 } 106 108 107 - static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) 109 + /** 110 + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback 111 + * @bo: The buffer object 112 + * @vmf: The fault structure handed to the callback 113 + * 114 + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped 115 + * during long waits, and after the wait the callback will be restarted. This 116 + * is to allow other threads using the same virtual memory space concurrent 117 + * access to map(), unmap() completely unrelated buffer objects. TTM buffer 118 + * object reservations sometimes wait for GPU and should therefore be 119 + * considered long waits. This function reserves the buffer object interruptibly 120 + * taking this into account. Starvation is avoided by the vm system not 121 + * allowing too many repeated restarts. 122 + * This function is intended to be used in customized fault() and _mkwrite() 123 + * handlers. 124 + * 125 + * Return: 126 + * 0 on success and the bo was reserved. 127 + * VM_FAULT_RETRY if blocking wait. 128 + * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. 129 + */ 130 + vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 131 + struct vm_fault *vmf) 108 132 { 109 - struct vm_area_struct *vma = vmf->vma; 110 - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 111 - vma->vm_private_data; 112 - struct ttm_bo_device *bdev = bo->bdev; 113 - unsigned long page_offset; 114 - unsigned long page_last; 115 - unsigned long pfn; 116 - struct ttm_tt *ttm = NULL; 117 - struct page *page; 118 - int err; 119 - int i; 120 - vm_fault_t ret = VM_FAULT_NOPAGE; 121 - unsigned long address = vmf->address; 122 - struct ttm_mem_type_manager *man = 123 - &bdev->man[bo->mem.mem_type]; 124 - struct vm_area_struct cvma; 125 - 126 133 /* 127 134 * Work around locking order reversal in fault / nopfn 128 135 * between mmap_sem and bo_reserve: Perform a trylock operation ··· 154 151 return VM_FAULT_NOPAGE; 155 152 } 156 153 154 + return 0; 155 + } 156 + EXPORT_SYMBOL(ttm_bo_vm_reserve); 157 + 158 + /** 159 + * ttm_bo_vm_fault_reserved - TTM fault helper 160 + * @vmf: The struct vm_fault given as argument to the fault callback 161 + * @prot: The page protection to be used for this memory area. 162 + * @num_prefault: Maximum number of prefault pages. The caller may want to 163 + * specify this based on madvice settings and the size of the GPU object 164 + * backed by the memory. 165 + * 166 + * This function inserts one or more page table entries pointing to the 167 + * memory backing the buffer object, and then returns a return code 168 + * instructing the caller to retry the page access. 169 + * 170 + * Return: 171 + * VM_FAULT_NOPAGE on success or pending signal 172 + * VM_FAULT_SIGBUS on unspecified error 173 + * VM_FAULT_OOM on out-of-memory 174 + * VM_FAULT_RETRY if retryable wait 175 + */ 176 + vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 177 + pgprot_t prot, 178 + pgoff_t num_prefault) 179 + { 180 + struct vm_area_struct *vma = vmf->vma; 181 + struct vm_area_struct cvma = *vma; 182 + struct ttm_buffer_object *bo = vma->vm_private_data; 183 + struct ttm_bo_device *bdev = bo->bdev; 184 + unsigned long page_offset; 185 + unsigned long page_last; 186 + unsigned long pfn; 187 + struct ttm_tt *ttm = NULL; 188 + struct page *page; 189 + int err; 190 + pgoff_t i; 191 + vm_fault_t ret = VM_FAULT_NOPAGE; 192 + unsigned long address = vmf->address; 193 + struct ttm_mem_type_manager *man = 194 + &bdev->man[bo->mem.mem_type]; 195 + 157 196 /* 158 197 * Refuse to fault imported pages. This should be handled 159 198 * (if at all) by redirecting mmap to the exporter. 160 199 */ 161 - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { 162 - ret = VM_FAULT_SIGBUS; 163 - goto out_unlock; 164 - } 200 + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) 201 + return VM_FAULT_SIGBUS; 165 202 166 203 if (bdev->driver->fault_reserve_notify) { 167 204 struct dma_fence *moving = dma_fence_get(bo->moving); ··· 212 169 break; 213 170 case -EBUSY: 214 171 case -ERESTARTSYS: 215 - ret = VM_FAULT_NOPAGE; 216 - goto out_unlock; 172 + return VM_FAULT_NOPAGE; 217 173 default: 218 - ret = VM_FAULT_SIGBUS; 219 - goto out_unlock; 174 + return VM_FAULT_SIGBUS; 220 175 } 221 176 222 177 if (bo->moving != moving) { ··· 230 189 * move. 231 190 */ 232 191 ret = ttm_bo_vm_fault_idle(bo, vmf); 233 - if (unlikely(ret != 0)) { 234 - if (ret == VM_FAULT_RETRY && 235 - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { 236 - /* The BO has already been unreserved. */ 237 - return ret; 238 - } 239 - 240 - goto out_unlock; 241 - } 192 + if (unlikely(ret != 0)) 193 + return ret; 242 194 243 195 err = ttm_mem_io_lock(man, true); 244 - if (unlikely(err != 0)) { 245 - ret = VM_FAULT_NOPAGE; 246 - goto out_unlock; 247 - } 196 + if (unlikely(err != 0)) 197 + return VM_FAULT_NOPAGE; 248 198 err = ttm_mem_io_reserve_vm(bo); 249 199 if (unlikely(err != 0)) { 250 200 ret = VM_FAULT_SIGBUS; ··· 252 220 goto out_io_unlock; 253 221 } 254 222 255 - /* 256 - * Make a local vma copy to modify the page_prot member 257 - * and vm_flags if necessary. The vma parameter is protected 258 - * by mmap_sem in write mode. 259 - */ 260 - cvma = *vma; 261 - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags); 262 - 263 - if (bo->mem.bus.is_iomem) { 264 - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, 265 - cvma.vm_page_prot); 266 - } else { 223 + cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot); 224 + if (!bo->mem.bus.is_iomem) { 267 225 struct ttm_operation_ctx ctx = { 268 226 .interruptible = false, 269 227 .no_wait_gpu = false, ··· 262 240 }; 263 241 264 242 ttm = bo->ttm; 265 - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, 266 - cvma.vm_page_prot); 267 - 268 - /* Allocate all page at once, most common usage */ 269 - if (ttm_tt_populate(ttm, &ctx)) { 243 + if (ttm_tt_populate(bo->ttm, &ctx)) { 270 244 ret = VM_FAULT_OOM; 271 245 goto out_io_unlock; 272 246 } 247 + } else { 248 + /* Iomem should not be marked encrypted */ 249 + cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); 273 250 } 274 251 275 252 /* 276 253 * Speculatively prefault a number of pages. Only error on 277 254 * first page. 278 255 */ 279 - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { 256 + for (i = 0; i < num_prefault; ++i) { 280 257 if (bo->mem.bus.is_iomem) { 281 - /* Iomem should not be marked encrypted */ 282 - cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); 283 258 pfn = ttm_bo_io_mem_pfn(bo, page_offset); 284 259 } else { 285 260 page = ttm->pages[page_offset]; ··· 312 293 ret = VM_FAULT_NOPAGE; 313 294 out_io_unlock: 314 295 ttm_mem_io_unlock(man); 315 - out_unlock: 296 + return ret; 297 + } 298 + EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); 299 + 300 + static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) 301 + { 302 + struct vm_area_struct *vma = vmf->vma; 303 + pgprot_t prot; 304 + struct ttm_buffer_object *bo = vma->vm_private_data; 305 + vm_fault_t ret; 306 + 307 + ret = ttm_bo_vm_reserve(bo, vmf); 308 + if (ret) 309 + return ret; 310 + 311 + prot = vm_get_page_prot(vma->vm_flags); 312 + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); 313 + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 314 + return ret; 315 + 316 316 dma_resv_unlock(bo->base.resv); 317 + 317 318 return ret; 318 319 } 319 320 320 - static void ttm_bo_vm_open(struct vm_area_struct *vma) 321 + void ttm_bo_vm_open(struct vm_area_struct *vma) 321 322 { 322 - struct ttm_buffer_object *bo = 323 - (struct ttm_buffer_object *)vma->vm_private_data; 323 + struct ttm_buffer_object *bo = vma->vm_private_data; 324 324 325 325 WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); 326 326 327 327 ttm_bo_get(bo); 328 328 } 329 + EXPORT_SYMBOL(ttm_bo_vm_open); 329 330 330 - static void ttm_bo_vm_close(struct vm_area_struct *vma) 331 + void ttm_bo_vm_close(struct vm_area_struct *vma) 331 332 { 332 - struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data; 333 + struct ttm_buffer_object *bo = vma->vm_private_data; 333 334 334 335 ttm_bo_put(bo); 335 336 vma->vm_private_data = NULL; 336 337 } 338 + EXPORT_SYMBOL(ttm_bo_vm_close); 337 339 338 340 static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, 339 341 unsigned long offset,

+1

drivers/gpu/drm/vmwgfx/Kconfig

··· 8 8 select FB_CFB_IMAGEBLIT 9 9 select DRM_TTM 10 10 select FB 11 + select MAPPING_DIRTY_HELPERS 11 12 # Only needed for the transitional use of drm_crtc_init - can be removed 12 13 # again once vmwgfx sets up the primary plane itself. 13 14 select DRM_KMS_HELPER

+1 -1

drivers/gpu/drm/vmwgfx/Makefile

··· 8 8 vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \ 9 9 vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ 10 10 vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \ 11 - vmwgfx_validation.o \ 11 + vmwgfx_validation.o vmwgfx_page_dirty.o \ 12 12 ttm_object.o ttm_lock.o 13 13 14 14 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o

+232 -1

drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h

··· 1280 1280 return offset; 1281 1281 } 1282 1282 1283 - 1284 1283 static inline u32 1285 1284 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, 1286 1285 surf_size_struct baseLevelSize, ··· 1372 1373 return true; 1373 1374 } 1374 1375 return svga3dsurface_is_dx_screen_target_format(format); 1376 + } 1377 + 1378 + /** 1379 + * struct svga3dsurface_mip - Mimpmap level information 1380 + * @bytes: Bytes required in the backing store of this mipmap level. 1381 + * @img_stride: Byte stride per image. 1382 + * @row_stride: Byte stride per block row. 1383 + * @size: The size of the mipmap. 1384 + */ 1385 + struct svga3dsurface_mip { 1386 + size_t bytes; 1387 + size_t img_stride; 1388 + size_t row_stride; 1389 + struct drm_vmw_size size; 1390 + 1391 + }; 1392 + 1393 + /** 1394 + * struct svga3dsurface_cache - Cached surface information 1395 + * @desc: Pointer to the surface descriptor 1396 + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. 1397 + * @mip_chain_bytes: Bytes required in the backing store for the whole chain 1398 + * of mip levels. 1399 + * @sheet_bytes: Bytes required in the backing store for a sheet 1400 + * representing a single sample. 1401 + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in 1402 + * a chain. 1403 + * @num_layers: Number of slices in an array texture or number of faces in 1404 + * a cubemap texture. 1405 + */ 1406 + struct svga3dsurface_cache { 1407 + const struct svga3d_surface_desc *desc; 1408 + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; 1409 + size_t mip_chain_bytes; 1410 + size_t sheet_bytes; 1411 + u32 num_mip_levels; 1412 + u32 num_layers; 1413 + }; 1414 + 1415 + /** 1416 + * struct svga3dsurface_loc - Surface location 1417 + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + 1418 + * mip_level. 1419 + * @x: X coordinate. 1420 + * @y: Y coordinate. 1421 + * @z: Z coordinate. 1422 + */ 1423 + struct svga3dsurface_loc { 1424 + u32 sub_resource; 1425 + u32 x, y, z; 1426 + }; 1427 + 1428 + /** 1429 + * svga3dsurface_subres - Compute the subresource from layer and mipmap. 1430 + * @cache: Surface layout data. 1431 + * @mip_level: The mipmap level. 1432 + * @layer: The surface layer (face or array slice). 1433 + * 1434 + * Return: The subresource. 1435 + */ 1436 + static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, 1437 + u32 mip_level, u32 layer) 1438 + { 1439 + return cache->num_mip_levels * layer + mip_level; 1440 + } 1441 + 1442 + /** 1443 + * svga3dsurface_setup_cache - Build a surface cache entry 1444 + * @size: The surface base level dimensions. 1445 + * @format: The surface format. 1446 + * @num_mip_levels: Number of mipmap levels. 1447 + * @num_layers: Number of layers. 1448 + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. 1449 + * 1450 + * Return: Zero on success, -EINVAL on invalid surface layout. 1451 + */ 1452 + static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, 1453 + SVGA3dSurfaceFormat format, 1454 + u32 num_mip_levels, 1455 + u32 num_layers, 1456 + u32 num_samples, 1457 + struct svga3dsurface_cache *cache) 1458 + { 1459 + const struct svga3d_surface_desc *desc; 1460 + u32 i; 1461 + 1462 + memset(cache, 0, sizeof(*cache)); 1463 + cache->desc = desc = svga3dsurface_get_desc(format); 1464 + cache->num_mip_levels = num_mip_levels; 1465 + cache->num_layers = num_layers; 1466 + for (i = 0; i < cache->num_mip_levels; i++) { 1467 + struct svga3dsurface_mip *mip = &cache->mip[i]; 1468 + 1469 + mip->size = svga3dsurface_get_mip_size(*size, i); 1470 + mip->bytes = svga3dsurface_get_image_buffer_size 1471 + (desc, &mip->size, 0); 1472 + mip->row_stride = 1473 + __KERNEL_DIV_ROUND_UP(mip->size.width, 1474 + desc->block_size.width) * 1475 + desc->bytes_per_block * num_samples; 1476 + if (!mip->row_stride) 1477 + goto invalid_dim; 1478 + 1479 + mip->img_stride = 1480 + __KERNEL_DIV_ROUND_UP(mip->size.height, 1481 + desc->block_size.height) * 1482 + mip->row_stride; 1483 + if (!mip->img_stride) 1484 + goto invalid_dim; 1485 + 1486 + cache->mip_chain_bytes += mip->bytes; 1487 + } 1488 + cache->sheet_bytes = cache->mip_chain_bytes * num_layers; 1489 + if (!cache->sheet_bytes) 1490 + goto invalid_dim; 1491 + 1492 + return 0; 1493 + 1494 + invalid_dim: 1495 + VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n"); 1496 + return -EINVAL; 1497 + } 1498 + 1499 + /** 1500 + * svga3dsurface_get_loc - Get a surface location from an offset into the 1501 + * backing store 1502 + * @cache: Surface layout data. 1503 + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. 1504 + * @offset: Offset into the surface backing store. 1505 + */ 1506 + static inline void 1507 + svga3dsurface_get_loc(const struct svga3dsurface_cache *cache, 1508 + struct svga3dsurface_loc *loc, 1509 + size_t offset) 1510 + { 1511 + const struct svga3dsurface_mip *mip = &cache->mip[0]; 1512 + const struct svga3d_surface_desc *desc = cache->desc; 1513 + u32 layer; 1514 + int i; 1515 + 1516 + if (offset >= cache->sheet_bytes) 1517 + offset %= cache->sheet_bytes; 1518 + 1519 + layer = offset / cache->mip_chain_bytes; 1520 + offset -= layer * cache->mip_chain_bytes; 1521 + for (i = 0; i < cache->num_mip_levels; ++i, ++mip) { 1522 + if (mip->bytes > offset) 1523 + break; 1524 + offset -= mip->bytes; 1525 + } 1526 + 1527 + loc->sub_resource = svga3dsurface_subres(cache, i, layer); 1528 + loc->z = offset / mip->img_stride; 1529 + offset -= loc->z * mip->img_stride; 1530 + loc->z *= desc->block_size.depth; 1531 + loc->y = offset / mip->row_stride; 1532 + offset -= loc->y * mip->row_stride; 1533 + loc->y *= desc->block_size.height; 1534 + loc->x = offset / desc->bytes_per_block; 1535 + loc->x *= desc->block_size.width; 1536 + } 1537 + 1538 + /** 1539 + * svga3dsurface_inc_loc - Clamp increment a surface location with one block 1540 + * size 1541 + * in each dimension. 1542 + * @loc: Pointer to a struct svga3dsurface_loc to be incremented. 1543 + * 1544 + * When computing the size of a range as size = end - start, the range does not 1545 + * include the end element. However a location representing the last byte 1546 + * of a touched region in the backing store *is* included in the range. 1547 + * This function modifies such a location to match the end definition 1548 + * given as start + size which is the one used in a SVGA3dBox. 1549 + */ 1550 + static inline void 1551 + svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache, 1552 + struct svga3dsurface_loc *loc) 1553 + { 1554 + const struct svga3d_surface_desc *desc = cache->desc; 1555 + u32 mip = loc->sub_resource % cache->num_mip_levels; 1556 + const struct drm_vmw_size *size = &cache->mip[mip].size; 1557 + 1558 + loc->sub_resource++; 1559 + loc->x += desc->block_size.width; 1560 + if (loc->x > size->width) 1561 + loc->x = size->width; 1562 + loc->y += desc->block_size.height; 1563 + if (loc->y > size->height) 1564 + loc->y = size->height; 1565 + loc->z += desc->block_size.depth; 1566 + if (loc->z > size->depth) 1567 + loc->z = size->depth; 1568 + } 1569 + 1570 + /** 1571 + * svga3dsurface_min_loc - The start location in a subresource 1572 + * @cache: Surface layout data. 1573 + * @sub_resource: The subresource. 1574 + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. 1575 + */ 1576 + static inline void 1577 + svga3dsurface_min_loc(const struct svga3dsurface_cache *cache, 1578 + u32 sub_resource, 1579 + struct svga3dsurface_loc *loc) 1580 + { 1581 + loc->sub_resource = sub_resource; 1582 + loc->x = loc->y = loc->z = 0; 1583 + } 1584 + 1585 + /** 1586 + * svga3dsurface_min_loc - The end location in a subresource 1587 + * @cache: Surface layout data. 1588 + * @sub_resource: The subresource. 1589 + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. 1590 + * 1591 + * Following the end definition given in svga3dsurface_inc_loc(), 1592 + * Compute the end location of a surface subresource. 1593 + */ 1594 + static inline void 1595 + svga3dsurface_max_loc(const struct svga3dsurface_cache *cache, 1596 + u32 sub_resource, 1597 + struct svga3dsurface_loc *loc) 1598 + { 1599 + const struct drm_vmw_size *size; 1600 + u32 mip; 1601 + 1602 + loc->sub_resource = sub_resource + 1; 1603 + mip = sub_resource % cache->num_mip_levels; 1604 + size = &cache->mip[mip].size; 1605 + loc->x = size->width; 1606 + loc->y = size->height; 1607 + loc->z = size->depth; 1375 1608 } 1376 1609 1377 1610 #endif /* _SVGA3D_SURFACEDEFS_H_ */

+7 -3

drivers/gpu/drm/vmwgfx/vmwgfx_bo.c

··· 462 462 { 463 463 struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); 464 464 465 + WARN_ON(vmw_bo->dirty); 466 + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); 465 467 vmw_bo_unmap(vmw_bo); 466 468 kfree(vmw_bo); 467 469 } ··· 477 475 static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) 478 476 { 479 477 struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo); 478 + struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; 480 479 481 - vmw_bo_unmap(&vmw_user_bo->vbo); 480 + WARN_ON(vbo->dirty); 481 + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); 482 + vmw_bo_unmap(vbo); 482 483 ttm_prime_object_kfree(vmw_user_bo, prime); 483 484 } 484 485 ··· 516 511 memset(vmw_bo, 0, sizeof(*vmw_bo)); 517 512 BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); 518 513 vmw_bo->base.priority = 3; 519 - 520 - INIT_LIST_HEAD(&vmw_bo->res_list); 514 + vmw_bo->res_tree = RB_ROOT; 521 515 522 516 ret = ttm_bo_init(bdev, &vmw_bo->base, size, 523 517 ttm_bo_type_device, placement,

+33 -11

drivers/gpu/drm/vmwgfx/vmwgfx_drv.h

··· 56 56 57 57 58 58 #define VMWGFX_DRIVER_NAME "vmwgfx" 59 - #define VMWGFX_DRIVER_DATE "20180704" 59 + #define VMWGFX_DRIVER_DATE "20190328" 60 60 #define VMWGFX_DRIVER_MAJOR 2 61 - #define VMWGFX_DRIVER_MINOR 15 61 + #define VMWGFX_DRIVER_MINOR 16 62 62 #define VMWGFX_DRIVER_PATCHLEVEL 0 63 63 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) 64 64 #define VMWGFX_MAX_RELOCATIONS 2048 ··· 100 100 /** 101 101 * struct vmw_buffer_object - TTM buffer object with vmwgfx additions 102 102 * @base: The TTM buffer object 103 - * @res_list: List of resources using this buffer object as a backing MOB 103 + * @res_tree: RB tree of resources using this buffer object as a backing MOB 104 104 * @pin_count: pin depth 105 105 * @cpu_writers: Number of synccpu write grabs. Protected by reservation when 106 106 * increased. May be decreased without reservation. 107 107 * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB 108 108 * @map: Kmap object for semi-persistent mappings 109 109 * @res_prios: Eviction priority counts for attached resources 110 + * @dirty: structure for user-space dirty-tracking 110 111 */ 111 112 struct vmw_buffer_object { 112 113 struct ttm_buffer_object base; 113 - struct list_head res_list; 114 + struct rb_root res_tree; 114 115 s32 pin_count; 115 116 atomic_t cpu_writers; 116 117 /* Not ref-counted. Protected by binding_mutex */ ··· 119 118 /* Protected by reservation */ 120 119 struct ttm_bo_kmap_obj map; 121 120 u32 res_prios[TTM_MAX_BO_PRIORITY]; 121 + struct vmw_bo_dirty *dirty; 122 122 }; 123 123 124 124 /** ··· 150 148 * @res_dirty: Resource contains data not yet in the backup buffer. Protected 151 149 * by resource reserved. 152 150 * @backup_dirty: Backup buffer contains data not yet in the HW resource. 153 - * Protecte by resource reserved. 151 + * Protected by resource reserved. 152 + * @coherent: Emulate coherency by tracking vm accesses. 154 153 * @backup: The backup buffer if any. Protected by resource reserved. 155 154 * @backup_offset: Offset into the backup buffer if any. Protected by resource 156 155 * reserved. Note that only a few resource types can have a @backup_offset ··· 160 157 * pin-count greater than zero. It is not on the resource LRU lists and its 161 158 * backup buffer is pinned. Hence it can't be evicted. 162 159 * @func: Method vtable for this resource. Immutable. 160 + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. 163 161 * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. 164 - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. 165 162 * @binding_head: List head for the context binding list. Protected by 166 163 * the @dev_priv::binding_mutex 167 164 * @res_free: The resource destructor. 168 165 * @hw_destroy: Callback to destroy the resource on the device, as part of 169 166 * resource destruction. 170 167 */ 168 + struct vmw_resource_dirty; 171 169 struct vmw_resource { 172 170 struct kref kref; 173 171 struct vmw_private *dev_priv; 174 172 int id; 175 173 u32 used_prio; 176 174 unsigned long backup_size; 177 - bool res_dirty; 178 - bool backup_dirty; 175 + u32 res_dirty : 1; 176 + u32 backup_dirty : 1; 177 + u32 coherent : 1; 179 178 struct vmw_buffer_object *backup; 180 179 unsigned long backup_offset; 181 180 unsigned long pin_count; 182 181 const struct vmw_res_func *func; 182 + struct rb_node mob_node; 183 183 struct list_head lru_head; 184 - struct list_head mob_head; 185 184 struct list_head binding_head; 185 + struct vmw_resource_dirty *dirty; 186 186 void (*res_free) (struct vmw_resource *res); 187 187 void (*hw_destroy) (struct vmw_resource *res); 188 188 }; ··· 684 678 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); 685 679 extern struct vmw_resource * 686 680 vmw_resource_reference_unless_doomed(struct vmw_resource *res); 687 - extern int vmw_resource_validate(struct vmw_resource *res, bool intr); 681 + extern int vmw_resource_validate(struct vmw_resource *res, bool intr, 682 + bool dirtying); 688 683 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, 689 684 bool no_backup); 690 685 extern bool vmw_resource_needs_backup(const struct vmw_resource *res); ··· 727 720 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); 728 721 void vmw_resource_mob_attach(struct vmw_resource *res); 729 722 void vmw_resource_mob_detach(struct vmw_resource *res); 723 + void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, 724 + pgoff_t end); 725 + int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, 726 + pgoff_t end, pgoff_t *num_prefault); 730 727 731 728 /** 732 729 * vmw_resource_mob_attached - Whether a resource currently has a mob attached ··· 740 729 */ 741 730 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) 742 731 { 743 - return !list_empty(&res->mob_head); 732 + return !RB_EMPTY_NODE(&res->mob_node); 744 733 } 745 734 746 735 /** ··· 1417 1406 */ 1418 1407 #define VMW_DEBUG_USER(fmt, ...) \ 1419 1408 DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) 1409 + 1410 + /* Resource dirtying - vmwgfx_page_dirty.c */ 1411 + void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); 1412 + int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); 1413 + void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); 1414 + void vmw_bo_dirty_clear_res(struct vmw_resource *res); 1415 + void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); 1416 + void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, 1417 + pgoff_t start, pgoff_t end); 1418 + vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); 1419 + vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); 1420 1420 1421 1421 /** 1422 1422 * VMW_DEBUG_KMS - Debug output for kernel mode-setting

-1

drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c

··· 2560 2560 offsetof(typeof(*cmd), sid)); 2561 2561 2562 2562 cmd = container_of(header, typeof(*cmd), header); 2563 - 2564 2563 return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, 2565 2564 VMW_RES_DIRTY_NONE, user_surface_converter, 2566 2565 &cmd->sid, NULL);

+488

drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /************************************************************************** 3 + * 4 + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA 5 + * 6 + * Permission is hereby granted, free of charge, to any person obtaining a 7 + * copy of this software and associated documentation files (the 8 + * "Software"), to deal in the Software without restriction, including 9 + * without limitation the rights to use, copy, modify, merge, publish, 10 + * distribute, sub license, and/or sell copies of the Software, and to 11 + * permit persons to whom the Software is furnished to do so, subject to 12 + * the following conditions: 13 + * 14 + * The above copyright notice and this permission notice (including the 15 + * next paragraph) shall be included in all copies or substantial portions 16 + * of the Software. 17 + * 18 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 + * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 + * 26 + **************************************************************************/ 27 + #include "vmwgfx_drv.h" 28 + 29 + /* 30 + * Different methods for tracking dirty: 31 + * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits 32 + * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write- 33 + * accesses in the VM mkwrite() callback 34 + */ 35 + enum vmw_bo_dirty_method { 36 + VMW_BO_DIRTY_PAGETABLE, 37 + VMW_BO_DIRTY_MKWRITE, 38 + }; 39 + 40 + /* 41 + * No dirtied pages at scan trigger a transition to the _MKWRITE method, 42 + * similarly a certain percentage of dirty pages trigger a transition to 43 + * the _PAGETABLE method. How many triggers should we wait for before 44 + * changing method? 45 + */ 46 + #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2 47 + 48 + /* Percentage to trigger a transition to the _PAGETABLE method */ 49 + #define VMW_DIRTY_PERCENTAGE 10 50 + 51 + /** 52 + * struct vmw_bo_dirty - Dirty information for buffer objects 53 + * @start: First currently dirty bit 54 + * @end: Last currently dirty bit + 1 55 + * @method: The currently used dirty method 56 + * @change_count: Number of consecutive method change triggers 57 + * @ref_count: Reference count for this structure 58 + * @bitmap_size: The size of the bitmap in bits. Typically equal to the 59 + * nuber of pages in the bo. 60 + * @size: The accounting size for this struct. 61 + * @bitmap: A bitmap where each bit represents a page. A set bit means a 62 + * dirty page. 63 + */ 64 + struct vmw_bo_dirty { 65 + unsigned long start; 66 + unsigned long end; 67 + enum vmw_bo_dirty_method method; 68 + unsigned int change_count; 69 + unsigned int ref_count; 70 + unsigned long bitmap_size; 71 + size_t size; 72 + unsigned long bitmap[0]; 73 + }; 74 + 75 + /** 76 + * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits 77 + * @vbo: The buffer object to scan 78 + * 79 + * Scans the pagetable for dirty bits. Clear those bits and modify the 80 + * dirty structure with the results. This function may change the 81 + * dirty-tracking method. 82 + */ 83 + static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo) 84 + { 85 + struct vmw_bo_dirty *dirty = vbo->dirty; 86 + pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node); 87 + struct address_space *mapping = vbo->base.bdev->dev_mapping; 88 + pgoff_t num_marked; 89 + 90 + num_marked = clean_record_shared_mapping_range 91 + (mapping, 92 + offset, dirty->bitmap_size, 93 + offset, &dirty->bitmap[0], 94 + &dirty->start, &dirty->end); 95 + if (num_marked == 0) 96 + dirty->change_count++; 97 + else 98 + dirty->change_count = 0; 99 + 100 + if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { 101 + dirty->change_count = 0; 102 + dirty->method = VMW_BO_DIRTY_MKWRITE; 103 + wp_shared_mapping_range(mapping, 104 + offset, dirty->bitmap_size); 105 + clean_record_shared_mapping_range(mapping, 106 + offset, dirty->bitmap_size, 107 + offset, &dirty->bitmap[0], 108 + &dirty->start, &dirty->end); 109 + } 110 + } 111 + 112 + /** 113 + * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method 114 + * @vbo: The buffer object to scan 115 + * 116 + * Write-protect pages written to so that consecutive write accesses will 117 + * trigger a call to mkwrite. 118 + * 119 + * This function may change the dirty-tracking method. 120 + */ 121 + static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) 122 + { 123 + struct vmw_bo_dirty *dirty = vbo->dirty; 124 + unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 125 + struct address_space *mapping = vbo->base.bdev->dev_mapping; 126 + pgoff_t num_marked; 127 + 128 + if (dirty->end <= dirty->start) 129 + return; 130 + 131 + num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping, 132 + dirty->start + offset, 133 + dirty->end - dirty->start); 134 + 135 + if (100UL * num_marked / dirty->bitmap_size > 136 + VMW_DIRTY_PERCENTAGE) { 137 + dirty->change_count++; 138 + } else { 139 + dirty->change_count = 0; 140 + } 141 + 142 + if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { 143 + pgoff_t start = 0; 144 + pgoff_t end = dirty->bitmap_size; 145 + 146 + dirty->method = VMW_BO_DIRTY_PAGETABLE; 147 + clean_record_shared_mapping_range(mapping, offset, end, offset, 148 + &dirty->bitmap[0], 149 + &start, &end); 150 + bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size); 151 + if (dirty->start < dirty->end) 152 + bitmap_set(&dirty->bitmap[0], dirty->start, 153 + dirty->end - dirty->start); 154 + dirty->change_count = 0; 155 + } 156 + } 157 + 158 + /** 159 + * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty 160 + * tracking structure 161 + * @vbo: The buffer object to scan 162 + * 163 + * This function may change the dirty tracking method. 164 + */ 165 + void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) 166 + { 167 + struct vmw_bo_dirty *dirty = vbo->dirty; 168 + 169 + if (dirty->method == VMW_BO_DIRTY_PAGETABLE) 170 + vmw_bo_dirty_scan_pagetable(vbo); 171 + else 172 + vmw_bo_dirty_scan_mkwrite(vbo); 173 + } 174 + 175 + /** 176 + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before 177 + * an unmap_mapping_range operation. 178 + * @vbo: The buffer object, 179 + * @start: First page of the range within the buffer object. 180 + * @end: Last page of the range within the buffer object + 1. 181 + * 182 + * If we're using the _PAGETABLE scan method, we may leak dirty pages 183 + * when calling unmap_mapping_range(). This function makes sure we pick 184 + * up all dirty pages. 185 + */ 186 + static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, 187 + pgoff_t start, pgoff_t end) 188 + { 189 + struct vmw_bo_dirty *dirty = vbo->dirty; 190 + unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 191 + struct address_space *mapping = vbo->base.bdev->dev_mapping; 192 + 193 + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) 194 + return; 195 + 196 + wp_shared_mapping_range(mapping, start + offset, end - start); 197 + clean_record_shared_mapping_range(mapping, start + offset, 198 + end - start, offset, 199 + &dirty->bitmap[0], &dirty->start, 200 + &dirty->end); 201 + } 202 + 203 + /** 204 + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo 205 + * @vbo: The buffer object, 206 + * @start: First page of the range within the buffer object. 207 + * @end: Last page of the range within the buffer object + 1. 208 + * 209 + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. 210 + */ 211 + void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, 212 + pgoff_t start, pgoff_t end) 213 + { 214 + unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 215 + struct address_space *mapping = vbo->base.bdev->dev_mapping; 216 + 217 + vmw_bo_dirty_pre_unmap(vbo, start, end); 218 + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, 219 + (loff_t) (end - start) << PAGE_SHIFT); 220 + } 221 + 222 + /** 223 + * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object 224 + * @vbo: The buffer object 225 + * 226 + * This function registers a dirty-tracking user to a buffer object. 227 + * A user can be for example a resource or a vma in a special user-space 228 + * mapping. 229 + * 230 + * Return: Zero on success, -ENOMEM on memory allocation failure. 231 + */ 232 + int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) 233 + { 234 + struct vmw_bo_dirty *dirty = vbo->dirty; 235 + pgoff_t num_pages = vbo->base.num_pages; 236 + size_t size, acc_size; 237 + int ret; 238 + static struct ttm_operation_ctx ctx = { 239 + .interruptible = false, 240 + .no_wait_gpu = false 241 + }; 242 + 243 + if (dirty) { 244 + dirty->ref_count++; 245 + return 0; 246 + } 247 + 248 + size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long); 249 + acc_size = ttm_round_pot(size); 250 + ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); 251 + if (ret) { 252 + VMW_DEBUG_USER("Out of graphics memory for buffer object " 253 + "dirty tracker.\n"); 254 + return ret; 255 + } 256 + dirty = kvzalloc(size, GFP_KERNEL); 257 + if (!dirty) { 258 + ret = -ENOMEM; 259 + goto out_no_dirty; 260 + } 261 + 262 + dirty->size = acc_size; 263 + dirty->bitmap_size = num_pages; 264 + dirty->start = dirty->bitmap_size; 265 + dirty->end = 0; 266 + dirty->ref_count = 1; 267 + if (num_pages < PAGE_SIZE / sizeof(pte_t)) { 268 + dirty->method = VMW_BO_DIRTY_PAGETABLE; 269 + } else { 270 + struct address_space *mapping = vbo->base.bdev->dev_mapping; 271 + pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node); 272 + 273 + dirty->method = VMW_BO_DIRTY_MKWRITE; 274 + 275 + /* Write-protect and then pick up already dirty bits */ 276 + wp_shared_mapping_range(mapping, offset, num_pages); 277 + clean_record_shared_mapping_range(mapping, offset, num_pages, 278 + offset, 279 + &dirty->bitmap[0], 280 + &dirty->start, &dirty->end); 281 + } 282 + 283 + vbo->dirty = dirty; 284 + 285 + return 0; 286 + 287 + out_no_dirty: 288 + ttm_mem_global_free(&ttm_mem_glob, acc_size); 289 + return ret; 290 + } 291 + 292 + /** 293 + * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object 294 + * @vbo: The buffer object 295 + * 296 + * This function releases a dirty-tracking user from a buffer object. 297 + * If the reference count reaches zero, then the dirty-tracking object is 298 + * freed and the pointer to it cleared. 299 + * 300 + * Return: Zero on success, -ENOMEM on memory allocation failure. 301 + */ 302 + void vmw_bo_dirty_release(struct vmw_buffer_object *vbo) 303 + { 304 + struct vmw_bo_dirty *dirty = vbo->dirty; 305 + 306 + if (dirty && --dirty->ref_count == 0) { 307 + size_t acc_size = dirty->size; 308 + 309 + kvfree(dirty); 310 + ttm_mem_global_free(&ttm_mem_glob, acc_size); 311 + vbo->dirty = NULL; 312 + } 313 + } 314 + 315 + /** 316 + * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from 317 + * its backing mob. 318 + * @res: The resource 319 + * 320 + * This function will pick up all dirty ranges affecting the resource from 321 + * it's backup mob, and call vmw_resource_dirty_update() once for each 322 + * range. The transferred ranges will be cleared from the backing mob's 323 + * dirty tracking. 324 + */ 325 + void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res) 326 + { 327 + struct vmw_buffer_object *vbo = res->backup; 328 + struct vmw_bo_dirty *dirty = vbo->dirty; 329 + pgoff_t start, cur, end; 330 + unsigned long res_start = res->backup_offset; 331 + unsigned long res_end = res->backup_offset + res->backup_size; 332 + 333 + WARN_ON_ONCE(res_start & ~PAGE_MASK); 334 + res_start >>= PAGE_SHIFT; 335 + res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); 336 + 337 + if (res_start >= dirty->end || res_end <= dirty->start) 338 + return; 339 + 340 + cur = max(res_start, dirty->start); 341 + res_end = max(res_end, dirty->end); 342 + while (cur < res_end) { 343 + unsigned long num; 344 + 345 + start = find_next_bit(&dirty->bitmap[0], res_end, cur); 346 + if (start >= res_end) 347 + break; 348 + 349 + end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1); 350 + cur = end + 1; 351 + num = end - start; 352 + bitmap_clear(&dirty->bitmap[0], start, num); 353 + vmw_resource_dirty_update(res, start, end); 354 + } 355 + 356 + if (res_start <= dirty->start && res_end > dirty->start) 357 + dirty->start = res_end; 358 + if (res_start < dirty->end && res_end >= dirty->end) 359 + dirty->end = res_start; 360 + } 361 + 362 + /** 363 + * vmw_bo_dirty_clear_res - Clear a resource's dirty region from 364 + * its backing mob. 365 + * @res: The resource 366 + * 367 + * This function will clear all dirty ranges affecting the resource from 368 + * it's backup mob's dirty tracking. 369 + */ 370 + void vmw_bo_dirty_clear_res(struct vmw_resource *res) 371 + { 372 + unsigned long res_start = res->backup_offset; 373 + unsigned long res_end = res->backup_offset + res->backup_size; 374 + struct vmw_buffer_object *vbo = res->backup; 375 + struct vmw_bo_dirty *dirty = vbo->dirty; 376 + 377 + res_start >>= PAGE_SHIFT; 378 + res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); 379 + 380 + if (res_start >= dirty->end || res_end <= dirty->start) 381 + return; 382 + 383 + res_start = max(res_start, dirty->start); 384 + res_end = min(res_end, dirty->end); 385 + bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start); 386 + 387 + if (res_start <= dirty->start && res_end > dirty->start) 388 + dirty->start = res_end; 389 + if (res_start < dirty->end && res_end >= dirty->end) 390 + dirty->end = res_start; 391 + } 392 + 393 + vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf) 394 + { 395 + struct vm_area_struct *vma = vmf->vma; 396 + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 397 + vma->vm_private_data; 398 + vm_fault_t ret; 399 + unsigned long page_offset; 400 + unsigned int save_flags; 401 + struct vmw_buffer_object *vbo = 402 + container_of(bo, typeof(*vbo), base); 403 + 404 + /* 405 + * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly. 406 + * So make sure the TTM helpers are aware. 407 + */ 408 + save_flags = vmf->flags; 409 + vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY; 410 + ret = ttm_bo_vm_reserve(bo, vmf); 411 + vmf->flags = save_flags; 412 + if (ret) 413 + return ret; 414 + 415 + page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node); 416 + if (unlikely(page_offset >= bo->num_pages)) { 417 + ret = VM_FAULT_SIGBUS; 418 + goto out_unlock; 419 + } 420 + 421 + if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE && 422 + !test_bit(page_offset, &vbo->dirty->bitmap[0])) { 423 + struct vmw_bo_dirty *dirty = vbo->dirty; 424 + 425 + __set_bit(page_offset, &dirty->bitmap[0]); 426 + dirty->start = min(dirty->start, page_offset); 427 + dirty->end = max(dirty->end, page_offset + 1); 428 + } 429 + 430 + out_unlock: 431 + dma_resv_unlock(bo->base.resv); 432 + return ret; 433 + } 434 + 435 + vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) 436 + { 437 + struct vm_area_struct *vma = vmf->vma; 438 + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 439 + vma->vm_private_data; 440 + struct vmw_buffer_object *vbo = 441 + container_of(bo, struct vmw_buffer_object, base); 442 + pgoff_t num_prefault; 443 + pgprot_t prot; 444 + vm_fault_t ret; 445 + 446 + ret = ttm_bo_vm_reserve(bo, vmf); 447 + if (ret) 448 + return ret; 449 + 450 + num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 : 451 + TTM_BO_VM_NUM_PREFAULT; 452 + 453 + if (vbo->dirty) { 454 + pgoff_t allowed_prefault; 455 + unsigned long page_offset; 456 + 457 + page_offset = vmf->pgoff - 458 + drm_vma_node_start(&bo->base.vma_node); 459 + if (page_offset >= bo->num_pages || 460 + vmw_resources_clean(vbo, page_offset, 461 + page_offset + PAGE_SIZE, 462 + &allowed_prefault)) { 463 + ret = VM_FAULT_SIGBUS; 464 + goto out_unlock; 465 + } 466 + 467 + num_prefault = min(num_prefault, allowed_prefault); 468 + } 469 + 470 + /* 471 + * If we don't track dirty using the MKWRITE method, make sure 472 + * sure the page protection is write-enabled so we don't get 473 + * a lot of unnecessary write faults. 474 + */ 475 + if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) 476 + prot = vma->vm_page_prot; 477 + else 478 + prot = vm_get_page_prot(vma->vm_flags); 479 + 480 + ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault); 481 + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 482 + return ret; 483 + 484 + out_unlock: 485 + dma_resv_unlock(bo->base.resv); 486 + 487 + return ret; 488 + }

+180 -13

drivers/gpu/drm/vmwgfx/vmwgfx_resource.c

··· 40 40 void vmw_resource_mob_attach(struct vmw_resource *res) 41 41 { 42 42 struct vmw_buffer_object *backup = res->backup; 43 + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; 43 44 44 45 dma_resv_assert_held(res->backup->base.base.resv); 45 46 res->used_prio = (res->res_dirty) ? res->func->dirty_prio : 46 47 res->func->prio; 47 - list_add_tail(&res->mob_head, &backup->res_list); 48 + 49 + while (*new) { 50 + struct vmw_resource *this = 51 + container_of(*new, struct vmw_resource, mob_node); 52 + 53 + parent = *new; 54 + new = (res->backup_offset < this->backup_offset) ? 55 + &((*new)->rb_left) : &((*new)->rb_right); 56 + } 57 + 58 + rb_link_node(&res->mob_node, parent, new); 59 + rb_insert_color(&res->mob_node, &backup->res_tree); 60 + 48 61 vmw_bo_prio_add(backup, res->used_prio); 49 62 } 50 63 ··· 71 58 72 59 dma_resv_assert_held(backup->base.base.resv); 73 60 if (vmw_resource_mob_attached(res)) { 74 - list_del_init(&res->mob_head); 61 + rb_erase(&res->mob_node, &backup->res_tree); 62 + RB_CLEAR_NODE(&res->mob_node); 75 63 vmw_bo_prio_del(backup, res->used_prio); 76 64 } 77 65 } ··· 133 119 } 134 120 res->backup_dirty = false; 135 121 vmw_resource_mob_detach(res); 122 + if (res->dirty) 123 + res->func->dirty_free(res); 124 + if (res->coherent) 125 + vmw_bo_dirty_release(res->backup); 136 126 ttm_bo_unreserve(bo); 137 127 vmw_bo_unreference(&res->backup); 138 128 } ··· 218 200 res->res_free = res_free; 219 201 res->dev_priv = dev_priv; 220 202 res->func = func; 203 + RB_CLEAR_NODE(&res->mob_node); 221 204 INIT_LIST_HEAD(&res->lru_head); 222 - INIT_LIST_HEAD(&res->mob_head); 223 205 INIT_LIST_HEAD(&res->binding_head); 224 206 res->id = -1; 225 207 res->backup = NULL; 226 208 res->backup_offset = 0; 227 209 res->backup_dirty = false; 228 210 res->res_dirty = false; 211 + res->coherent = false; 229 212 res->used_prio = 3; 213 + res->dirty = NULL; 230 214 if (delay_id) 231 215 return 0; 232 216 else ··· 393 373 * should be retried once resources have been freed up. 394 374 */ 395 375 static int vmw_resource_do_validate(struct vmw_resource *res, 396 - struct ttm_validate_buffer *val_buf) 376 + struct ttm_validate_buffer *val_buf, 377 + bool dirtying) 397 378 { 398 379 int ret = 0; 399 380 const struct vmw_res_func *func = res->func; ··· 414 393 goto out_bind_failed; 415 394 if (func->needs_backup) 416 395 vmw_resource_mob_attach(res); 396 + } 397 + 398 + /* 399 + * Handle the case where the backup mob is marked coherent but 400 + * the resource isn't. 401 + */ 402 + if (func->dirty_alloc && vmw_resource_mob_attached(res) && 403 + !res->coherent) { 404 + if (res->backup->dirty && !res->dirty) { 405 + ret = func->dirty_alloc(res); 406 + if (ret) 407 + return ret; 408 + } else if (!res->backup->dirty && res->dirty) { 409 + func->dirty_free(res); 410 + } 411 + } 412 + 413 + /* 414 + * Transfer the dirty regions to the resource and update 415 + * the resource. 416 + */ 417 + if (res->dirty) { 418 + if (dirtying && !res->res_dirty) { 419 + pgoff_t start = res->backup_offset >> PAGE_SHIFT; 420 + pgoff_t end = __KERNEL_DIV_ROUND_UP 421 + (res->backup_offset + res->backup_size, 422 + PAGE_SIZE); 423 + 424 + vmw_bo_dirty_unmap(res->backup, start, end); 425 + } 426 + 427 + vmw_bo_dirty_transfer_to_res(res); 428 + return func->dirty_sync(res); 417 429 } 418 430 419 431 return 0; ··· 487 433 if (switch_backup && new_backup != res->backup) { 488 434 if (res->backup) { 489 435 vmw_resource_mob_detach(res); 436 + if (res->coherent) 437 + vmw_bo_dirty_release(res->backup); 490 438 vmw_bo_unreference(&res->backup); 491 439 } 492 440 493 441 if (new_backup) { 494 442 res->backup = vmw_bo_reference(new_backup); 443 + 444 + /* 445 + * The validation code should already have added a 446 + * dirty tracker here. 447 + */ 448 + WARN_ON(res->coherent && !new_backup->dirty); 449 + 495 450 vmw_resource_mob_attach(res); 496 451 } else { 497 452 res->backup = NULL; 498 453 } 454 + } else if (switch_backup && res->coherent) { 455 + vmw_bo_dirty_release(res->backup); 499 456 } 457 + 500 458 if (switch_backup) 501 459 res->backup_offset = new_backup_offset; 502 460 ··· 688 622 * to the device. 689 623 * @res: The resource to make visible to the device. 690 624 * @intr: Perform waits interruptible if possible. 625 + * @dirtying: Pending GPU operation will dirty the resource 691 626 * 692 627 * On succesful return, any backup DMA buffer pointed to by @res->backup will 693 628 * be reserved and validated. ··· 698 631 * Return: Zero on success, -ERESTARTSYS if interrupted, negative error code 699 632 * on failure. 700 633 */ 701 - int vmw_resource_validate(struct vmw_resource *res, bool intr) 634 + int vmw_resource_validate(struct vmw_resource *res, bool intr, 635 + bool dirtying) 702 636 { 703 637 int ret; 704 638 struct vmw_resource *evict_res; ··· 716 648 if (res->backup) 717 649 val_buf.bo = &res->backup->base; 718 650 do { 719 - ret = vmw_resource_do_validate(res, &val_buf); 651 + ret = vmw_resource_do_validate(res, &val_buf, dirtying); 720 652 if (likely(ret != -EBUSY)) 721 653 break; 722 654 ··· 779 711 */ 780 712 void vmw_resource_unbind_list(struct vmw_buffer_object *vbo) 781 713 { 782 - 783 - struct vmw_resource *res, *next; 784 714 struct ttm_validate_buffer val_buf = { 785 715 .bo = &vbo->base, 786 716 .num_shared = 0 787 717 }; 788 718 789 719 dma_resv_assert_held(vbo->base.base.resv); 790 - list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) { 791 - if (!res->func->unbind) 792 - continue; 720 + while (!RB_EMPTY_ROOT(&vbo->res_tree)) { 721 + struct rb_node *node = vbo->res_tree.rb_node; 722 + struct vmw_resource *res = 723 + container_of(node, struct vmw_resource, mob_node); 793 724 794 - (void) res->func->unbind(res, res->res_dirty, &val_buf); 725 + if (!WARN_ON_ONCE(!res->func->unbind)) 726 + (void) res->func->unbind(res, res->res_dirty, &val_buf); 727 + 795 728 res->backup_dirty = true; 796 729 res->res_dirty = false; 797 730 vmw_resource_mob_detach(res); ··· 1016 947 /* Do we really need to pin the MOB as well? */ 1017 948 vmw_bo_pin_reserved(vbo, true); 1018 949 } 1019 - ret = vmw_resource_validate(res, interruptible); 950 + ret = vmw_resource_validate(res, interruptible, true); 1020 951 if (vbo) 1021 952 ttm_bo_unreserve(&vbo->base); 1022 953 if (ret) ··· 1075 1006 enum vmw_res_type vmw_res_type(const struct vmw_resource *res) 1076 1007 { 1077 1008 return res->func->res_type; 1009 + } 1010 + 1011 + /** 1012 + * vmw_resource_update_dirty - Update a resource's dirty tracker with a 1013 + * sequential range of touched backing store memory. 1014 + * @res: The resource. 1015 + * @start: The first page touched. 1016 + * @end: The last page touched + 1. 1017 + */ 1018 + void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, 1019 + pgoff_t end) 1020 + { 1021 + if (res->dirty) 1022 + res->func->dirty_range_add(res, start << PAGE_SHIFT, 1023 + end << PAGE_SHIFT); 1024 + } 1025 + 1026 + /** 1027 + * vmw_resources_clean - Clean resources intersecting a mob range 1028 + * @vbo: The mob buffer object 1029 + * @start: The mob page offset starting the range 1030 + * @end: The mob page offset ending the range 1031 + * @num_prefault: Returns how many pages including the first have been 1032 + * cleaned and are ok to prefault 1033 + */ 1034 + int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, 1035 + pgoff_t end, pgoff_t *num_prefault) 1036 + { 1037 + struct rb_node *cur = vbo->res_tree.rb_node; 1038 + struct vmw_resource *found = NULL; 1039 + unsigned long res_start = start << PAGE_SHIFT; 1040 + unsigned long res_end = end << PAGE_SHIFT; 1041 + unsigned long last_cleaned = 0; 1042 + 1043 + /* 1044 + * Find the resource with lowest backup_offset that intersects the 1045 + * range. 1046 + */ 1047 + while (cur) { 1048 + struct vmw_resource *cur_res = 1049 + container_of(cur, struct vmw_resource, mob_node); 1050 + 1051 + if (cur_res->backup_offset >= res_end) { 1052 + cur = cur->rb_left; 1053 + } else if (cur_res->backup_offset + cur_res->backup_size <= 1054 + res_start) { 1055 + cur = cur->rb_right; 1056 + } else { 1057 + found = cur_res; 1058 + cur = cur->rb_left; 1059 + /* Continue to look for resources with lower offsets */ 1060 + } 1061 + } 1062 + 1063 + /* 1064 + * In order of increasing backup_offset, clean dirty resorces 1065 + * intersecting the range. 1066 + */ 1067 + while (found) { 1068 + if (found->res_dirty) { 1069 + int ret; 1070 + 1071 + if (!found->func->clean) 1072 + return -EINVAL; 1073 + 1074 + ret = found->func->clean(found); 1075 + if (ret) 1076 + return ret; 1077 + 1078 + found->res_dirty = false; 1079 + } 1080 + last_cleaned = found->backup_offset + found->backup_size; 1081 + cur = rb_next(&found->mob_node); 1082 + if (!cur) 1083 + break; 1084 + 1085 + found = container_of(cur, struct vmw_resource, mob_node); 1086 + if (found->backup_offset >= res_end) 1087 + break; 1088 + } 1089 + 1090 + /* 1091 + * Set number of pages allowed prefaulting and fence the buffer object 1092 + */ 1093 + *num_prefault = 1; 1094 + if (last_cleaned > res_start) { 1095 + struct ttm_buffer_object *bo = &vbo->base; 1096 + 1097 + *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start, 1098 + PAGE_SIZE); 1099 + vmw_bo_fence_single(bo, NULL); 1100 + if (bo->moving) 1101 + dma_fence_put(bo->moving); 1102 + bo->moving = dma_fence_get 1103 + (dma_resv_get_excl(bo->base.resv)); 1104 + } 1105 + 1106 + return 0; 1078 1107 }

+13

drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h

··· 71 71 * @commit_notify: If the resource is a command buffer managed resource, 72 72 * callback to notify that a define or remove command 73 73 * has been committed to the device. 74 + * @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not 75 + * supported. 76 + * @dirty_free: Free the dirty tracker. 77 + * @dirty_sync: Upload the dirty mob contents to the resource. 78 + * @dirty_add_range: Add a sequential dirty range to the resource 79 + * dirty tracker. 80 + * @clean: Clean the resource. 74 81 */ 75 82 struct vmw_res_func { 76 83 enum vmw_res_type res_type; ··· 97 90 struct ttm_validate_buffer *val_buf); 98 91 void (*commit_notify)(struct vmw_resource *res, 99 92 enum vmw_cmdbuf_res_state state); 93 + int (*dirty_alloc)(struct vmw_resource *res); 94 + void (*dirty_free)(struct vmw_resource *res); 95 + int (*dirty_sync)(struct vmw_resource *res); 96 + void (*dirty_range_add)(struct vmw_resource *res, size_t start, 97 + size_t end); 98 + int (*clean)(struct vmw_resource *res); 100 99 }; 101 100 102 101 /**

+392 -3

drivers/gpu/drm/vmwgfx/vmwgfx_surface.c

··· 68 68 uint32_t bo_offset; 69 69 }; 70 70 71 + /** 72 + * vmw_surface_dirty - Surface dirty-tracker 73 + * @cache: Cached layout information of the surface. 74 + * @size: Accounting size for the struct vmw_surface_dirty. 75 + * @num_subres: Number of subresources. 76 + * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource. 77 + */ 78 + struct vmw_surface_dirty { 79 + struct svga3dsurface_cache cache; 80 + size_t size; 81 + u32 num_subres; 82 + SVGA3dBox boxes[0]; 83 + }; 84 + 71 85 static void vmw_user_surface_free(struct vmw_resource *res); 72 86 static struct vmw_resource * 73 87 vmw_user_surface_base_to_res(struct ttm_base_object *base); ··· 109 95 struct drm_vmw_surface_arg *req, 110 96 struct drm_vmw_gb_surface_ref_ext_rep *rep, 111 97 struct drm_file *file_priv); 98 + 99 + static void vmw_surface_dirty_free(struct vmw_resource *res); 100 + static int vmw_surface_dirty_alloc(struct vmw_resource *res); 101 + static int vmw_surface_dirty_sync(struct vmw_resource *res); 102 + static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start, 103 + size_t end); 104 + static int vmw_surface_clean(struct vmw_resource *res); 112 105 113 106 static const struct vmw_user_resource_conv user_surface_conv = { 114 107 .object_type = VMW_RES_SURFACE, ··· 154 133 .create = vmw_gb_surface_create, 155 134 .destroy = vmw_gb_surface_destroy, 156 135 .bind = vmw_gb_surface_bind, 157 - .unbind = vmw_gb_surface_unbind 136 + .unbind = vmw_gb_surface_unbind, 137 + .dirty_alloc = vmw_surface_dirty_alloc, 138 + .dirty_free = vmw_surface_dirty_free, 139 + .dirty_sync = vmw_surface_dirty_sync, 140 + .dirty_range_add = vmw_surface_dirty_range_add, 141 + .clean = vmw_surface_clean, 158 142 }; 159 143 160 144 /** ··· 665 639 struct vmw_private *dev_priv = srf->res.dev_priv; 666 640 uint32_t size = user_srf->size; 667 641 642 + WARN_ON_ONCE(res->dirty); 668 643 if (user_srf->master) 669 644 drm_master_put(&user_srf->master); 670 645 kfree(srf->offsets); ··· 1193 1166 cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE; 1194 1167 cmd2->header.size = sizeof(cmd2->body); 1195 1168 cmd2->body.sid = res->id; 1196 - res->backup_dirty = false; 1197 1169 } 1198 1170 vmw_fifo_commit(dev_priv, submit_size); 1171 + 1172 + if (res->backup->dirty && res->backup_dirty) { 1173 + /* We've just made a full upload. Cear dirty regions. */ 1174 + vmw_bo_dirty_clear_res(res); 1175 + } 1176 + 1177 + res->backup_dirty = false; 1199 1178 1200 1179 return 0; 1201 1180 } ··· 1667 1634 } 1668 1635 } 1669 1636 } else if (req->base.drm_surface_flags & 1670 - drm_vmw_surface_flag_create_buffer) 1637 + (drm_vmw_surface_flag_create_buffer | 1638 + drm_vmw_surface_flag_coherent)) 1671 1639 ret = vmw_user_bo_alloc(dev_priv, tfile, 1672 1640 res->backup_size, 1673 1641 req->base.drm_surface_flags & ··· 1680 1646 if (unlikely(ret != 0)) { 1681 1647 vmw_resource_unreference(&res); 1682 1648 goto out_unlock; 1649 + } 1650 + 1651 + if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) { 1652 + struct vmw_buffer_object *backup = res->backup; 1653 + 1654 + ttm_bo_reserve(&backup->base, false, false, NULL); 1655 + if (!res->func->dirty_alloc) 1656 + ret = -EINVAL; 1657 + if (!ret) 1658 + ret = vmw_bo_dirty_add(backup); 1659 + if (!ret) { 1660 + res->coherent = true; 1661 + ret = res->func->dirty_alloc(res); 1662 + } 1663 + ttm_bo_unreserve(&backup->base); 1664 + if (ret) { 1665 + vmw_resource_unreference(&res); 1666 + goto out_unlock; 1667 + } 1668 + 1683 1669 } 1684 1670 1685 1671 tmp = vmw_resource_reference(res); ··· 1809 1755 ttm_base_object_unref(&base); 1810 1756 1811 1757 return ret; 1758 + } 1759 + 1760 + /** 1761 + * vmw_subres_dirty_add - Add a dirty region to a subresource 1762 + * @dirty: The surfaces's dirty tracker. 1763 + * @loc_start: The location corresponding to the start of the region. 1764 + * @loc_end: The location corresponding to the end of the region. 1765 + * 1766 + * As we are assuming that @loc_start and @loc_end represent a sequential 1767 + * range of backing store memory, if the region spans multiple lines then 1768 + * regardless of the x coordinate, the full lines are dirtied. 1769 + * Correspondingly if the region spans multiple z slices, then full rather 1770 + * than partial z slices are dirtied. 1771 + */ 1772 + static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty, 1773 + const struct svga3dsurface_loc *loc_start, 1774 + const struct svga3dsurface_loc *loc_end) 1775 + { 1776 + const struct svga3dsurface_cache *cache = &dirty->cache; 1777 + SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource]; 1778 + u32 mip = loc_start->sub_resource % cache->num_mip_levels; 1779 + const struct drm_vmw_size *size = &cache->mip[mip].size; 1780 + u32 box_c2 = box->z + box->d; 1781 + 1782 + if (WARN_ON(loc_start->sub_resource >= dirty->num_subres)) 1783 + return; 1784 + 1785 + if (box->d == 0 || box->z > loc_start->z) 1786 + box->z = loc_start->z; 1787 + if (box_c2 < loc_end->z) 1788 + box->d = loc_end->z - box->z; 1789 + 1790 + if (loc_start->z + 1 == loc_end->z) { 1791 + box_c2 = box->y + box->h; 1792 + if (box->h == 0 || box->y > loc_start->y) 1793 + box->y = loc_start->y; 1794 + if (box_c2 < loc_end->y) 1795 + box->h = loc_end->y - box->y; 1796 + 1797 + if (loc_start->y + 1 == loc_end->y) { 1798 + box_c2 = box->x + box->w; 1799 + if (box->w == 0 || box->x > loc_start->x) 1800 + box->x = loc_start->x; 1801 + if (box_c2 < loc_end->x) 1802 + box->w = loc_end->x - box->x; 1803 + } else { 1804 + box->x = 0; 1805 + box->w = size->width; 1806 + } 1807 + } else { 1808 + box->y = 0; 1809 + box->h = size->height; 1810 + box->x = 0; 1811 + box->w = size->width; 1812 + } 1813 + } 1814 + 1815 + /** 1816 + * vmw_subres_dirty_full - Mark a full subresource as dirty 1817 + * @dirty: The surface's dirty tracker. 1818 + * @subres: The subresource 1819 + */ 1820 + static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres) 1821 + { 1822 + const struct svga3dsurface_cache *cache = &dirty->cache; 1823 + u32 mip = subres % cache->num_mip_levels; 1824 + const struct drm_vmw_size *size = &cache->mip[mip].size; 1825 + SVGA3dBox *box = &dirty->boxes[subres]; 1826 + 1827 + box->x = 0; 1828 + box->y = 0; 1829 + box->z = 0; 1830 + box->w = size->width; 1831 + box->h = size->height; 1832 + box->d = size->depth; 1833 + } 1834 + 1835 + /* 1836 + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture 1837 + * surfaces. 1838 + */ 1839 + static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res, 1840 + size_t start, size_t end) 1841 + { 1842 + struct vmw_surface_dirty *dirty = 1843 + (struct vmw_surface_dirty *) res->dirty; 1844 + size_t backup_end = res->backup_offset + res->backup_size; 1845 + struct svga3dsurface_loc loc1, loc2; 1846 + const struct svga3dsurface_cache *cache; 1847 + 1848 + start = max_t(size_t, start, res->backup_offset) - res->backup_offset; 1849 + end = min(end, backup_end) - res->backup_offset; 1850 + cache = &dirty->cache; 1851 + svga3dsurface_get_loc(cache, &loc1, start); 1852 + svga3dsurface_get_loc(cache, &loc2, end - 1); 1853 + svga3dsurface_inc_loc(cache, &loc2); 1854 + 1855 + if (loc1.sub_resource + 1 == loc2.sub_resource) { 1856 + /* Dirty range covers a single sub-resource */ 1857 + vmw_subres_dirty_add(dirty, &loc1, &loc2); 1858 + } else { 1859 + /* Dirty range covers multiple sub-resources */ 1860 + struct svga3dsurface_loc loc_min, loc_max; 1861 + u32 sub_res; 1862 + 1863 + svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max); 1864 + vmw_subres_dirty_add(dirty, &loc1, &loc_max); 1865 + svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min); 1866 + vmw_subres_dirty_add(dirty, &loc_min, &loc2); 1867 + for (sub_res = loc1.sub_resource + 1; 1868 + sub_res < loc2.sub_resource - 1; ++sub_res) 1869 + vmw_subres_dirty_full(dirty, sub_res); 1870 + } 1871 + } 1872 + 1873 + /* 1874 + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer 1875 + * surfaces. 1876 + */ 1877 + static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res, 1878 + size_t start, size_t end) 1879 + { 1880 + struct vmw_surface_dirty *dirty = 1881 + (struct vmw_surface_dirty *) res->dirty; 1882 + const struct svga3dsurface_cache *cache = &dirty->cache; 1883 + size_t backup_end = res->backup_offset + cache->mip_chain_bytes; 1884 + SVGA3dBox *box = &dirty->boxes[0]; 1885 + u32 box_c2; 1886 + 1887 + box->h = box->d = 1; 1888 + start = max_t(size_t, start, res->backup_offset) - res->backup_offset; 1889 + end = min(end, backup_end) - res->backup_offset; 1890 + box_c2 = box->x + box->w; 1891 + if (box->w == 0 || box->x > start) 1892 + box->x = start; 1893 + if (box_c2 < end) 1894 + box->w = end - box->x; 1895 + } 1896 + 1897 + /* 1898 + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces 1899 + */ 1900 + static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start, 1901 + size_t end) 1902 + { 1903 + struct vmw_surface *srf = vmw_res_to_srf(res); 1904 + 1905 + if (WARN_ON(end <= res->backup_offset || 1906 + start >= res->backup_offset + res->backup_size)) 1907 + return; 1908 + 1909 + if (srf->format == SVGA3D_BUFFER) 1910 + vmw_surface_buf_dirty_range_add(res, start, end); 1911 + else 1912 + vmw_surface_tex_dirty_range_add(res, start, end); 1913 + } 1914 + 1915 + /* 1916 + * vmw_surface_dirty_sync - The surface's dirty_sync callback. 1917 + */ 1918 + static int vmw_surface_dirty_sync(struct vmw_resource *res) 1919 + { 1920 + struct vmw_private *dev_priv = res->dev_priv; 1921 + bool has_dx = 0; 1922 + u32 i, num_dirty; 1923 + struct vmw_surface_dirty *dirty = 1924 + (struct vmw_surface_dirty *) res->dirty; 1925 + size_t alloc_size; 1926 + const struct svga3dsurface_cache *cache = &dirty->cache; 1927 + struct { 1928 + SVGA3dCmdHeader header; 1929 + SVGA3dCmdDXUpdateSubResource body; 1930 + } *cmd1; 1931 + struct { 1932 + SVGA3dCmdHeader header; 1933 + SVGA3dCmdUpdateGBImage body; 1934 + } *cmd2; 1935 + void *cmd; 1936 + 1937 + num_dirty = 0; 1938 + for (i = 0; i < dirty->num_subres; ++i) { 1939 + const SVGA3dBox *box = &dirty->boxes[i]; 1940 + 1941 + if (box->d) 1942 + num_dirty++; 1943 + } 1944 + 1945 + if (!num_dirty) 1946 + goto out; 1947 + 1948 + alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2)); 1949 + cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size); 1950 + if (!cmd) 1951 + return -ENOMEM; 1952 + 1953 + cmd1 = cmd; 1954 + cmd2 = cmd; 1955 + 1956 + for (i = 0; i < dirty->num_subres; ++i) { 1957 + const SVGA3dBox *box = &dirty->boxes[i]; 1958 + 1959 + if (!box->d) 1960 + continue; 1961 + 1962 + /* 1963 + * DX_UPDATE_SUBRESOURCE is aware of array surfaces. 1964 + * UPDATE_GB_IMAGE is not. 1965 + */ 1966 + if (has_dx) { 1967 + cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE; 1968 + cmd1->header.size = sizeof(cmd1->body); 1969 + cmd1->body.sid = res->id; 1970 + cmd1->body.subResource = i; 1971 + cmd1->body.box = *box; 1972 + cmd1++; 1973 + } else { 1974 + cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; 1975 + cmd2->header.size = sizeof(cmd2->body); 1976 + cmd2->body.image.sid = res->id; 1977 + cmd2->body.image.face = i / cache->num_mip_levels; 1978 + cmd2->body.image.mipmap = i - 1979 + (cache->num_mip_levels * cmd2->body.image.face); 1980 + cmd2->body.box = *box; 1981 + cmd2++; 1982 + } 1983 + 1984 + } 1985 + vmw_fifo_commit(dev_priv, alloc_size); 1986 + out: 1987 + memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) * 1988 + dirty->num_subres); 1989 + 1990 + return 0; 1991 + } 1992 + 1993 + /* 1994 + * vmw_surface_dirty_alloc - The surface's dirty_alloc callback. 1995 + */ 1996 + static int vmw_surface_dirty_alloc(struct vmw_resource *res) 1997 + { 1998 + struct vmw_surface *srf = vmw_res_to_srf(res); 1999 + struct vmw_surface_dirty *dirty; 2000 + u32 num_layers = 1; 2001 + u32 num_mip; 2002 + u32 num_subres; 2003 + u32 num_samples; 2004 + size_t dirty_size, acc_size; 2005 + static struct ttm_operation_ctx ctx = { 2006 + .interruptible = false, 2007 + .no_wait_gpu = false 2008 + }; 2009 + int ret; 2010 + 2011 + if (srf->array_size) 2012 + num_layers = srf->array_size; 2013 + else if (srf->flags & SVGA3D_SURFACE_CUBEMAP) 2014 + num_layers *= SVGA3D_MAX_SURFACE_FACES; 2015 + 2016 + num_mip = srf->mip_levels[0]; 2017 + if (!num_mip) 2018 + num_mip = 1; 2019 + 2020 + num_subres = num_layers * num_mip; 2021 + dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]); 2022 + acc_size = ttm_round_pot(dirty_size); 2023 + ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv), 2024 + acc_size, &ctx); 2025 + if (ret) { 2026 + VMW_DEBUG_USER("Out of graphics memory for surface " 2027 + "dirty tracker.\n"); 2028 + return ret; 2029 + } 2030 + 2031 + dirty = kvzalloc(dirty_size, GFP_KERNEL); 2032 + if (!dirty) { 2033 + ret = -ENOMEM; 2034 + goto out_no_dirty; 2035 + } 2036 + 2037 + num_samples = max_t(u32, 1, srf->multisample_count); 2038 + ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip, 2039 + num_layers, num_samples, &dirty->cache); 2040 + if (ret) 2041 + goto out_no_cache; 2042 + 2043 + dirty->num_subres = num_subres; 2044 + dirty->size = acc_size; 2045 + res->dirty = (struct vmw_resource_dirty *) dirty; 2046 + 2047 + return 0; 2048 + 2049 + out_no_cache: 2050 + kvfree(dirty); 2051 + out_no_dirty: 2052 + ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); 2053 + return ret; 2054 + } 2055 + 2056 + /* 2057 + * vmw_surface_dirty_free - The surface's dirty_free callback 2058 + */ 2059 + static void vmw_surface_dirty_free(struct vmw_resource *res) 2060 + { 2061 + struct vmw_surface_dirty *dirty = 2062 + (struct vmw_surface_dirty *) res->dirty; 2063 + size_t acc_size = dirty->size; 2064 + 2065 + kvfree(dirty); 2066 + ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); 2067 + res->dirty = NULL; 2068 + } 2069 + 2070 + /* 2071 + * vmw_surface_clean - The surface's clean callback 2072 + */ 2073 + static int vmw_surface_clean(struct vmw_resource *res) 2074 + { 2075 + struct vmw_private *dev_priv = res->dev_priv; 2076 + size_t alloc_size; 2077 + struct { 2078 + SVGA3dCmdHeader header; 2079 + SVGA3dCmdReadbackGBSurface body; 2080 + } *cmd; 2081 + 2082 + alloc_size = sizeof(*cmd); 2083 + cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size); 2084 + if (!cmd) 2085 + return -ENOMEM; 2086 + 2087 + cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE; 2088 + cmd->header.size = sizeof(cmd->body); 2089 + cmd->body.sid = res->id; 2090 + vmw_fifo_commit(dev_priv, alloc_size); 2091 + 2092 + return 0; 1812 2093 }

+14 -1

drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c

··· 29 29 30 30 int vmw_mmap(struct file *filp, struct vm_area_struct *vma) 31 31 { 32 + static const struct vm_operations_struct vmw_vm_ops = { 33 + .pfn_mkwrite = vmw_bo_vm_mkwrite, 34 + .page_mkwrite = vmw_bo_vm_mkwrite, 35 + .fault = vmw_bo_vm_fault, 36 + .open = ttm_bo_vm_open, 37 + .close = ttm_bo_vm_close 38 + }; 32 39 struct drm_file *file_priv = filp->private_data; 33 40 struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); 41 + int ret = ttm_bo_mmap(filp, vma, &dev_priv->bdev); 34 42 35 - return ttm_bo_mmap(filp, vma, &dev_priv->bdev); 43 + if (ret) 44 + return ret; 45 + 46 + vma->vm_ops = &vmw_vm_ops; 47 + 48 + return 0; 36 49 } 37 50 38 51 /* struct vmw_validation_mem callback */

+73 -1

drivers/gpu/drm/vmwgfx/vmwgfx_validation.c

··· 33 33 * struct vmw_validation_bo_node - Buffer object validation metadata. 34 34 * @base: Metadata used for TTM reservation- and validation. 35 35 * @hash: A hash entry used for the duplicate detection hash table. 36 + * @coherent_count: If switching backup buffers, number of new coherent 37 + * resources that will have this buffer as a backup buffer. 36 38 * @as_mob: Validate as mob. 37 39 * @cpu_blit: Validate for cpu blit access. 38 40 * ··· 44 42 struct vmw_validation_bo_node { 45 43 struct ttm_validate_buffer base; 46 44 struct drm_hash_item hash; 45 + unsigned int coherent_count; 47 46 u32 as_mob : 1; 48 47 u32 cpu_blit : 1; 49 48 }; ··· 462 459 if (ret) 463 460 goto out_unreserve; 464 461 } 462 + 463 + if (val->switching_backup && val->new_backup && 464 + res->coherent) { 465 + struct vmw_validation_bo_node *bo_node = 466 + vmw_validation_find_bo_dup(ctx, 467 + val->new_backup); 468 + 469 + if (WARN_ON(!bo_node)) { 470 + ret = -EINVAL; 471 + goto out_unreserve; 472 + } 473 + bo_node->coherent_count++; 474 + } 465 475 } 466 476 467 477 return 0; ··· 581 565 int ret; 582 566 583 567 list_for_each_entry(entry, &ctx->bo_list, base.head) { 568 + struct vmw_buffer_object *vbo = 569 + container_of(entry->base.bo, typeof(*vbo), base); 570 + 584 571 if (entry->cpu_blit) { 585 572 struct ttm_operation_ctx ctx = { 586 573 .interruptible = intr, ··· 598 579 } 599 580 if (ret) 600 581 return ret; 582 + 583 + /* 584 + * Rather than having the resource code allocating the bo 585 + * dirty tracker in resource_unreserve() where we can't fail, 586 + * Do it here when validating the buffer object. 587 + */ 588 + if (entry->coherent_count) { 589 + unsigned int coherent_count = entry->coherent_count; 590 + 591 + while (coherent_count) { 592 + ret = vmw_bo_dirty_add(vbo); 593 + if (ret) 594 + return ret; 595 + 596 + coherent_count--; 597 + } 598 + entry->coherent_count -= coherent_count; 599 + } 600 + 601 + if (vbo->dirty) 602 + vmw_bo_dirty_scan(vbo); 601 603 } 602 604 return 0; 603 605 } ··· 644 604 struct vmw_resource *res = val->res; 645 605 struct vmw_buffer_object *backup = res->backup; 646 606 647 - ret = vmw_resource_validate(res, intr); 607 + ret = vmw_resource_validate(res, intr, val->dirty_set && 608 + val->dirty); 648 609 if (ret) { 649 610 if (ret != -ERESTARTSYS) 650 611 DRM_ERROR("Failed to validate resource.\n"); ··· 871 830 872 831 ctx->mem_size_left += size; 873 832 return 0; 833 + } 834 + 835 + /** 836 + * vmw_validation_bo_backoff - Unreserve buffer objects registered with a 837 + * validation context 838 + * @ctx: The validation context 839 + * 840 + * This function unreserves the buffer objects previously reserved using 841 + * vmw_validation_bo_reserve. It's typically used as part of an error path 842 + */ 843 + void vmw_validation_bo_backoff(struct vmw_validation_context *ctx) 844 + { 845 + struct vmw_validation_bo_node *entry; 846 + 847 + /* 848 + * Switching coherent resource backup buffers failed. 849 + * Release corresponding buffer object dirty trackers. 850 + */ 851 + list_for_each_entry(entry, &ctx->bo_list, base.head) { 852 + if (entry->coherent_count) { 853 + unsigned int coherent_count = entry->coherent_count; 854 + struct vmw_buffer_object *vbo = 855 + container_of(entry->base.bo, typeof(*vbo), 856 + base); 857 + 858 + while (coherent_count--) 859 + vmw_bo_dirty_release(vbo); 860 + } 861 + } 862 + 863 + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list); 874 864 }

+2 -14

drivers/gpu/drm/vmwgfx/vmwgfx_validation.h

··· 174 174 } 175 175 176 176 /** 177 - * vmw_validation_bo_backoff - Unreserve buffer objects registered with a 178 - * validation context 179 - * @ctx: The validation context 180 - * 181 - * This function unreserves the buffer objects previously reserved using 182 - * vmw_validation_bo_reserve. It's typically used as part of an error path 183 - */ 184 - static inline void 185 - vmw_validation_bo_backoff(struct vmw_validation_context *ctx) 186 - { 187 - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list); 188 - } 189 - 190 - /** 191 177 * vmw_validation_bo_fence - Unreserve and fence buffer objects registered 192 178 * with a validation context 193 179 * @ctx: The validation context ··· 255 269 unsigned int size); 256 270 void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx, 257 271 void *val_private, u32 dirty); 272 + void vmw_validation_bo_backoff(struct vmw_validation_context *ctx); 273 + 258 274 #endif

+14

include/drm/ttm/ttm_bo_api.h

··· 727 727 { 728 728 return bo->base.dev != NULL; 729 729 } 730 + 731 + /* Default number of pre-faulted pages in the TTM fault handler */ 732 + #define TTM_BO_VM_NUM_PREFAULT 16 733 + 734 + vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 735 + struct vm_fault *vmf); 736 + 737 + vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 738 + pgprot_t prot, 739 + pgoff_t num_prefault); 740 + 741 + void ttm_bo_vm_open(struct vm_area_struct *vma); 742 + 743 + void ttm_bo_vm_close(struct vm_area_struct *vma); 730 744 #endif

-2

include/linux/huge_mm.h

··· 216 216 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, 217 217 struct vm_area_struct *vma) 218 218 { 219 - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); 220 219 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) 221 220 return __pmd_trans_huge_lock(pmd, vma); 222 221 else ··· 224 225 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, 225 226 struct vm_area_struct *vma) 226 227 { 227 - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); 228 228 if (pud_trans_huge(*pud) || pud_devmap(*pud)) 229 229 return __pud_trans_huge_lock(pud, vma); 230 230 else

+12 -1

include/linux/mm.h

··· 2632 2632 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, 2633 2633 unsigned long size, pte_fn_t fn, void *data); 2634 2634 2635 - 2636 2635 #ifdef CONFIG_PAGE_POISONING 2637 2636 extern bool page_poisoning_enabled(void); 2638 2637 extern void kernel_poison_pages(struct page *page, int numpages, int enable); ··· 2871 2872 { 2872 2873 return !memcmp_pages(page1, page2); 2873 2874 } 2875 + 2876 + #ifdef CONFIG_MAPPING_DIRTY_HELPERS 2877 + unsigned long clean_record_shared_mapping_range(struct address_space *mapping, 2878 + pgoff_t first_index, pgoff_t nr, 2879 + pgoff_t bitmap_pgoff, 2880 + unsigned long *bitmap, 2881 + pgoff_t *start, 2882 + pgoff_t *end); 2883 + 2884 + unsigned long wp_shared_mapping_range(struct address_space *mapping, 2885 + pgoff_t first_index, pgoff_t nr); 2886 + #endif 2874 2887 2875 2888 #endif /* __KERNEL__ */ 2876 2889 #endif /* _LINUX_MM_H */

+9

include/linux/pagewalk.h

··· 24 24 * "do page table walk over the current vma", returning 25 25 * a negative value means "abort current page table walk 26 26 * right now" and returning 1 means "skip the current vma" 27 + * @pre_vma: if set, called before starting walk on a non-null vma. 28 + * @post_vma: if set, called after a walk on a non-null vma, provided 29 + * that @pre_vma and the vma walk succeeded. 27 30 */ 28 31 struct mm_walk_ops { 29 32 int (*pud_entry)(pud_t *pud, unsigned long addr, ··· 42 39 struct mm_walk *walk); 43 40 int (*test_walk)(unsigned long addr, unsigned long next, 44 41 struct mm_walk *walk); 42 + int (*pre_vma)(unsigned long start, unsigned long end, 43 + struct mm_walk *walk); 44 + void (*post_vma)(struct mm_walk *walk); 45 45 }; 46 46 47 47 /** ··· 68 62 void *private); 69 63 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, 70 64 void *private); 65 + int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, 66 + pgoff_t nr, const struct mm_walk_ops *ops, 67 + void *private); 71 68 72 69 #endif /* _LINUX_PAGEWALK_H */

+3 -1

include/uapi/drm/vmwgfx_drm.h

··· 891 891 * surface. 892 892 * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is 893 893 * given. 894 + * @drm_vmw_surface_flag_coherent: Back surface with coherent memory. 894 895 */ 895 896 enum drm_vmw_surface_flags { 896 897 drm_vmw_surface_flag_shareable = (1 << 0), 897 898 drm_vmw_surface_flag_scanout = (1 << 1), 898 - drm_vmw_surface_flag_create_buffer = (1 << 2) 899 + drm_vmw_surface_flag_create_buffer = (1 << 2), 900 + drm_vmw_surface_flag_coherent = (1 << 3), 899 901 }; 900 902 901 903 /**

+3

mm/Kconfig

··· 736 736 config ARCH_HAS_HUGEPD 737 737 bool 738 738 739 + config MAPPING_DIRTY_HELPERS 740 + bool 741 + 739 742 endmenu

+1

mm/Makefile

··· 107 107 obj-$(CONFIG_ZONE_DEVICE) += memremap.o 108 108 obj-$(CONFIG_HMM_MIRROR) += hmm.o 109 109 obj-$(CONFIG_MEMFD_CREATE) += memfd.o 110 + obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o

+315

mm/mapping_dirty_helpers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/pagewalk.h> 3 + #include <linux/hugetlb.h> 4 + #include <linux/bitops.h> 5 + #include <linux/mmu_notifier.h> 6 + #include <asm/cacheflush.h> 7 + #include <asm/tlbflush.h> 8 + 9 + /** 10 + * struct wp_walk - Private struct for pagetable walk callbacks 11 + * @range: Range for mmu notifiers 12 + * @tlbflush_start: Address of first modified pte 13 + * @tlbflush_end: Address of last modified pte + 1 14 + * @total: Total number of modified ptes 15 + */ 16 + struct wp_walk { 17 + struct mmu_notifier_range range; 18 + unsigned long tlbflush_start; 19 + unsigned long tlbflush_end; 20 + unsigned long total; 21 + }; 22 + 23 + /** 24 + * wp_pte - Write-protect a pte 25 + * @pte: Pointer to the pte 26 + * @addr: The virtual page address 27 + * @walk: pagetable walk callback argument 28 + * 29 + * The function write-protects a pte and records the range in 30 + * virtual address space of touched ptes for efficient range TLB flushes. 31 + */ 32 + static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end, 33 + struct mm_walk *walk) 34 + { 35 + struct wp_walk *wpwalk = walk->private; 36 + pte_t ptent = *pte; 37 + 38 + if (pte_write(ptent)) { 39 + pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte); 40 + 41 + ptent = pte_wrprotect(old_pte); 42 + ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent); 43 + wpwalk->total++; 44 + wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr); 45 + wpwalk->tlbflush_end = max(wpwalk->tlbflush_end, 46 + addr + PAGE_SIZE); 47 + } 48 + 49 + return 0; 50 + } 51 + 52 + /** 53 + * struct clean_walk - Private struct for the clean_record_pte function. 54 + * @base: struct wp_walk we derive from 55 + * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap 56 + * @bitmap: Bitmap with one bit for each page offset in the address_space range 57 + * covered. 58 + * @start: Address_space page offset of first modified pte relative 59 + * to @bitmap_pgoff 60 + * @end: Address_space page offset of last modified pte relative 61 + * to @bitmap_pgoff 62 + */ 63 + struct clean_walk { 64 + struct wp_walk base; 65 + pgoff_t bitmap_pgoff; 66 + unsigned long *bitmap; 67 + pgoff_t start; 68 + pgoff_t end; 69 + }; 70 + 71 + #define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base) 72 + 73 + /** 74 + * clean_record_pte - Clean a pte and record its address space offset in a 75 + * bitmap 76 + * @pte: Pointer to the pte 77 + * @addr: The virtual page address 78 + * @walk: pagetable walk callback argument 79 + * 80 + * The function cleans a pte and records the range in 81 + * virtual address space of touched ptes for efficient TLB flushes. 82 + * It also records dirty ptes in a bitmap representing page offsets 83 + * in the address_space, as well as the first and last of the bits 84 + * touched. 85 + */ 86 + static int clean_record_pte(pte_t *pte, unsigned long addr, 87 + unsigned long end, struct mm_walk *walk) 88 + { 89 + struct wp_walk *wpwalk = walk->private; 90 + struct clean_walk *cwalk = to_clean_walk(wpwalk); 91 + pte_t ptent = *pte; 92 + 93 + if (pte_dirty(ptent)) { 94 + pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) + 95 + walk->vma->vm_pgoff - cwalk->bitmap_pgoff; 96 + pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte); 97 + 98 + ptent = pte_mkclean(old_pte); 99 + ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent); 100 + 101 + wpwalk->total++; 102 + wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr); 103 + wpwalk->tlbflush_end = max(wpwalk->tlbflush_end, 104 + addr + PAGE_SIZE); 105 + 106 + __set_bit(pgoff, cwalk->bitmap); 107 + cwalk->start = min(cwalk->start, pgoff); 108 + cwalk->end = max(cwalk->end, pgoff + 1); 109 + } 110 + 111 + return 0; 112 + } 113 + 114 + /* wp_clean_pmd_entry - The pagewalk pmd callback. */ 115 + static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end, 116 + struct mm_walk *walk) 117 + { 118 + /* Dirty-tracking should be handled on the pte level */ 119 + pmd_t pmdval = pmd_read_atomic(pmd); 120 + 121 + if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval)) 122 + WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval)); 123 + 124 + return 0; 125 + } 126 + 127 + /* wp_clean_pud_entry - The pagewalk pud callback. */ 128 + static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end, 129 + struct mm_walk *walk) 130 + { 131 + /* Dirty-tracking should be handled on the pte level */ 132 + pud_t pudval = READ_ONCE(*pud); 133 + 134 + if (pud_trans_huge(pudval) || pud_devmap(pudval)) 135 + WARN_ON(pud_write(pudval) || pud_dirty(pudval)); 136 + 137 + return 0; 138 + } 139 + 140 + /* 141 + * wp_clean_pre_vma - The pagewalk pre_vma callback. 142 + * 143 + * The pre_vma callback performs the cache flush, stages the tlb flush 144 + * and calls the necessary mmu notifiers. 145 + */ 146 + static int wp_clean_pre_vma(unsigned long start, unsigned long end, 147 + struct mm_walk *walk) 148 + { 149 + struct wp_walk *wpwalk = walk->private; 150 + 151 + wpwalk->tlbflush_start = end; 152 + wpwalk->tlbflush_end = start; 153 + 154 + mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0, 155 + walk->vma, walk->mm, start, end); 156 + mmu_notifier_invalidate_range_start(&wpwalk->range); 157 + flush_cache_range(walk->vma, start, end); 158 + 159 + /* 160 + * We're not using tlb_gather_mmu() since typically 161 + * only a small subrange of PTEs are affected, whereas 162 + * tlb_gather_mmu() records the full range. 163 + */ 164 + inc_tlb_flush_pending(walk->mm); 165 + 166 + return 0; 167 + } 168 + 169 + /* 170 + * wp_clean_post_vma - The pagewalk post_vma callback. 171 + * 172 + * The post_vma callback performs the tlb flush and calls necessary mmu 173 + * notifiers. 174 + */ 175 + static void wp_clean_post_vma(struct mm_walk *walk) 176 + { 177 + struct wp_walk *wpwalk = walk->private; 178 + 179 + if (mm_tlb_flush_nested(walk->mm)) 180 + flush_tlb_range(walk->vma, wpwalk->range.start, 181 + wpwalk->range.end); 182 + else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start) 183 + flush_tlb_range(walk->vma, wpwalk->tlbflush_start, 184 + wpwalk->tlbflush_end); 185 + 186 + mmu_notifier_invalidate_range_end(&wpwalk->range); 187 + dec_tlb_flush_pending(walk->mm); 188 + } 189 + 190 + /* 191 + * wp_clean_test_walk - The pagewalk test_walk callback. 192 + * 193 + * Won't perform dirty-tracking on COW, read-only or HUGETLB vmas. 194 + */ 195 + static int wp_clean_test_walk(unsigned long start, unsigned long end, 196 + struct mm_walk *walk) 197 + { 198 + unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags); 199 + 200 + /* Skip non-applicable VMAs */ 201 + if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) != 202 + (VM_SHARED | VM_MAYWRITE)) 203 + return 1; 204 + 205 + return 0; 206 + } 207 + 208 + static const struct mm_walk_ops clean_walk_ops = { 209 + .pte_entry = clean_record_pte, 210 + .pmd_entry = wp_clean_pmd_entry, 211 + .pud_entry = wp_clean_pud_entry, 212 + .test_walk = wp_clean_test_walk, 213 + .pre_vma = wp_clean_pre_vma, 214 + .post_vma = wp_clean_post_vma 215 + }; 216 + 217 + static const struct mm_walk_ops wp_walk_ops = { 218 + .pte_entry = wp_pte, 219 + .pmd_entry = wp_clean_pmd_entry, 220 + .pud_entry = wp_clean_pud_entry, 221 + .test_walk = wp_clean_test_walk, 222 + .pre_vma = wp_clean_pre_vma, 223 + .post_vma = wp_clean_post_vma 224 + }; 225 + 226 + /** 227 + * wp_shared_mapping_range - Write-protect all ptes in an address space range 228 + * @mapping: The address_space we want to write protect 229 + * @first_index: The first page offset in the range 230 + * @nr: Number of incremental page offsets to cover 231 + * 232 + * Note: This function currently skips transhuge page-table entries, since 233 + * it's intended for dirty-tracking on the PTE level. It will warn on 234 + * encountering transhuge write-enabled entries, though, and can easily be 235 + * extended to handle them as well. 236 + * 237 + * Return: The number of ptes actually write-protected. Note that 238 + * already write-protected ptes are not counted. 239 + */ 240 + unsigned long wp_shared_mapping_range(struct address_space *mapping, 241 + pgoff_t first_index, pgoff_t nr) 242 + { 243 + struct wp_walk wpwalk = { .total = 0 }; 244 + 245 + i_mmap_lock_read(mapping); 246 + WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops, 247 + &wpwalk)); 248 + i_mmap_unlock_read(mapping); 249 + 250 + return wpwalk.total; 251 + } 252 + EXPORT_SYMBOL_GPL(wp_shared_mapping_range); 253 + 254 + /** 255 + * clean_record_shared_mapping_range - Clean and record all ptes in an 256 + * address space range 257 + * @mapping: The address_space we want to clean 258 + * @first_index: The first page offset in the range 259 + * @nr: Number of incremental page offsets to cover 260 + * @bitmap_pgoff: The page offset of the first bit in @bitmap 261 + * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to 262 + * cover the whole range @first_index..@first_index + @nr. 263 + * @start: Pointer to number of the first set bit in @bitmap. 264 + * is modified as new bits are set by the function. 265 + * @end: Pointer to the number of the last set bit in @bitmap. 266 + * none set. The value is modified as new bits are set by the function. 267 + * 268 + * Note: When this function returns there is no guarantee that a CPU has 269 + * not already dirtied new ptes. However it will not clean any ptes not 270 + * reported in the bitmap. The guarantees are as follows: 271 + * a) All ptes dirty when the function starts executing will end up recorded 272 + * in the bitmap. 273 + * b) All ptes dirtied after that will either remain dirty, be recorded in the 274 + * bitmap or both. 275 + * 276 + * If a caller needs to make sure all dirty ptes are picked up and none 277 + * additional are added, it first needs to write-protect the address-space 278 + * range and make sure new writers are blocked in page_mkwrite() or 279 + * pfn_mkwrite(). And then after a TLB flush following the write-protection 280 + * pick up all dirty bits. 281 + * 282 + * Note: This function currently skips transhuge page-table entries, since 283 + * it's intended for dirty-tracking on the PTE level. It will warn on 284 + * encountering transhuge dirty entries, though, and can easily be extended 285 + * to handle them as well. 286 + * 287 + * Return: The number of dirty ptes actually cleaned. 288 + */ 289 + unsigned long clean_record_shared_mapping_range(struct address_space *mapping, 290 + pgoff_t first_index, pgoff_t nr, 291 + pgoff_t bitmap_pgoff, 292 + unsigned long *bitmap, 293 + pgoff_t *start, 294 + pgoff_t *end) 295 + { 296 + bool none_set = (*start >= *end); 297 + struct clean_walk cwalk = { 298 + .base = { .total = 0 }, 299 + .bitmap_pgoff = bitmap_pgoff, 300 + .bitmap = bitmap, 301 + .start = none_set ? nr : *start, 302 + .end = none_set ? 0 : *end, 303 + }; 304 + 305 + i_mmap_lock_read(mapping); 306 + WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops, 307 + &cwalk.base)); 308 + i_mmap_unlock_read(mapping); 309 + 310 + *start = cwalk.start; 311 + *end = cwalk.end; 312 + 313 + return cwalk.base.total; 314 + } 315 + EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);

+96 -3

mm/pagewalk.c

··· 10 10 pte_t *pte; 11 11 int err = 0; 12 12 const struct mm_walk_ops *ops = walk->ops; 13 + spinlock_t *ptl; 13 14 14 - pte = pte_offset_map(pmd, addr); 15 + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 15 16 for (;;) { 16 17 err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); 17 18 if (err) ··· 23 22 pte++; 24 23 } 25 24 26 - pte_unmap(pte); 25 + pte_unmap_unlock(pte, ptl); 27 26 return err; 28 27 } 29 28 ··· 254 253 { 255 254 int err = 0; 256 255 struct vm_area_struct *vma = walk->vma; 256 + const struct mm_walk_ops *ops = walk->ops; 257 + 258 + if (vma && ops->pre_vma) { 259 + err = ops->pre_vma(start, end, walk); 260 + if (err) 261 + return err; 262 + } 257 263 258 264 if (vma && is_vm_hugetlb_page(vma)) { 259 - if (walk->ops->hugetlb_entry) 265 + if (ops->hugetlb_entry) 260 266 err = walk_hugetlb_range(start, end, walk); 261 267 } else 262 268 err = walk_pgd_range(start, end, walk); 269 + 270 + if (vma && ops->post_vma) 271 + ops->post_vma(walk); 263 272 264 273 return err; 265 274 } ··· 300 289 * they really want to walk over the current vma, typically by checking 301 290 * its vm_flags. walk_page_test() and @ops->test_walk() are used for this 302 291 * purpose. 292 + * 293 + * If operations need to be staged before and committed after a vma is walked, 294 + * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), 295 + * since it is intended to handle commit-type operations, can't return any 296 + * errors. 303 297 * 304 298 * struct mm_walk keeps current values of some common data like vma and pmd, 305 299 * which are useful for the access from callbacks. If you want to pass some ··· 391 375 if (err < 0) 392 376 return err; 393 377 return __walk_page_range(vma->vm_start, vma->vm_end, &walk); 378 + } 379 + 380 + /** 381 + * walk_page_mapping - walk all memory areas mapped into a struct address_space. 382 + * @mapping: Pointer to the struct address_space 383 + * @first_index: First page offset in the address_space 384 + * @nr: Number of incremental page offsets to cover 385 + * @ops: operation to call during the walk 386 + * @private: private data for callbacks' usage 387 + * 388 + * This function walks all memory areas mapped into a struct address_space. 389 + * The walk is limited to only the given page-size index range, but if 390 + * the index boundaries cross a huge page-table entry, that entry will be 391 + * included. 392 + * 393 + * Also see walk_page_range() for additional information. 394 + * 395 + * Locking: 396 + * This function can't require that the struct mm_struct::mmap_sem is held, 397 + * since @mapping may be mapped by multiple processes. Instead 398 + * @mapping->i_mmap_rwsem must be held. This might have implications in the 399 + * callbacks, and it's up tho the caller to ensure that the 400 + * struct mm_struct::mmap_sem is not needed. 401 + * 402 + * Also this means that a caller can't rely on the struct 403 + * vm_area_struct::vm_flags to be constant across a call, 404 + * except for immutable flags. Callers requiring this shouldn't use 405 + * this function. 406 + * 407 + * Return: 0 on success, negative error code on failure, positive number on 408 + * caller defined premature termination. 409 + */ 410 + int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, 411 + pgoff_t nr, const struct mm_walk_ops *ops, 412 + void *private) 413 + { 414 + struct mm_walk walk = { 415 + .ops = ops, 416 + .private = private, 417 + }; 418 + struct vm_area_struct *vma; 419 + pgoff_t vba, vea, cba, cea; 420 + unsigned long start_addr, end_addr; 421 + int err = 0; 422 + 423 + lockdep_assert_held(&mapping->i_mmap_rwsem); 424 + vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, 425 + first_index + nr - 1) { 426 + /* Clip to the vma */ 427 + vba = vma->vm_pgoff; 428 + vea = vba + vma_pages(vma); 429 + cba = first_index; 430 + cba = max(cba, vba); 431 + cea = first_index + nr; 432 + cea = min(cea, vea); 433 + 434 + start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; 435 + end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; 436 + if (start_addr >= end_addr) 437 + continue; 438 + 439 + walk.vma = vma; 440 + walk.mm = vma->vm_mm; 441 + 442 + err = walk_page_test(vma->vm_start, vma->vm_end, &walk); 443 + if (err > 0) { 444 + err = 0; 445 + break; 446 + } else if (err < 0) 447 + break; 448 + 449 + err = __walk_page_range(start_addr, end_addr, &walk); 450 + if (err) 451 + break; 452 + } 453 + 454 + return err; 394 455 }