Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/ttm: remove ttm_bo_vm_insert_huge()

The huge page functionality in TTM does not work safely because PUD and
PMD entries do not have a special bit.

get_user_pages_fast() considers any page that passed pmd_huge() as
usable:

if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {

And vmf_insert_pfn_pmd_prot() unconditionally sets

entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));

eg on x86 the page will be _PAGE_PRESENT | PAGE_PSE.

As such gup_huge_pmd() will try to deref a struct page:

head = try_grab_compound_head(pmd_page(orig), refs, flags);

and thus crash.

Thomas further notices that the drivers are not expecting the struct page
to be used by anything - in particular the refcount incr above will cause
them to malfunction.

Thus everything about this is not able to fully work correctly considering
GUP_fast. Delete it entirely. It can return someday along with a proper
PMD/PUD_SPECIAL bit in the page table itself to gate GUP_fast.

Fixes: 314b6580adc5 ("drm/ttm, drm/vmwgfx: Support huge TTM pagefaults")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Thomas Hellström <thomas.helllstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
[danvet: Update subject per Thomas' &Christian's review]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/0-v2-a44694790652+4ac-ttm_pmd_jgg@nvidia.com

authored by

Jason Gunthorpe and committed by
Daniel Vetter
0d979509 ff2d2384

+7 -175
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 61 61 } 62 62 63 63 ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, 64 - TTM_BO_VM_NUM_PREFAULT, 1); 64 + TTM_BO_VM_NUM_PREFAULT); 65 65 66 66 drm_dev_exit(idx); 67 67 } else {
+1 -1
drivers/gpu/drm/nouveau/nouveau_gem.c
··· 56 56 57 57 nouveau_bo_del_io_reserve_lru(bo); 58 58 prot = vm_get_page_prot(vma->vm_flags); 59 - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); 59 + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); 60 60 nouveau_bo_add_io_reserve_lru(bo); 61 61 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 62 62 return ret;
+1 -1
drivers/gpu/drm/radeon/radeon_gem.c
··· 61 61 goto unlock_resv; 62 62 63 63 ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, 64 - TTM_BO_VM_NUM_PREFAULT, 1); 64 + TTM_BO_VM_NUM_PREFAULT); 65 65 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 66 66 goto unlock_mclk; 67 67
+2 -92
drivers/gpu/drm/ttm/ttm_bo_vm.c
··· 173 173 } 174 174 EXPORT_SYMBOL(ttm_bo_vm_reserve); 175 175 176 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 177 - /** 178 - * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults 179 - * @vmf: Fault data 180 - * @bo: The buffer object 181 - * @page_offset: Page offset from bo start 182 - * @fault_page_size: The size of the fault in pages. 183 - * @pgprot: The page protections. 184 - * Does additional checking whether it's possible to insert a PUD or PMD 185 - * pfn and performs the insertion. 186 - * 187 - * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if 188 - * a huge fault was not possible, or on insertion error. 189 - */ 190 - static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 191 - struct ttm_buffer_object *bo, 192 - pgoff_t page_offset, 193 - pgoff_t fault_page_size, 194 - pgprot_t pgprot) 195 - { 196 - pgoff_t i; 197 - vm_fault_t ret; 198 - unsigned long pfn; 199 - pfn_t pfnt; 200 - struct ttm_tt *ttm = bo->ttm; 201 - bool write = vmf->flags & FAULT_FLAG_WRITE; 202 - 203 - /* Fault should not cross bo boundary. */ 204 - page_offset &= ~(fault_page_size - 1); 205 - if (page_offset + fault_page_size > bo->resource->num_pages) 206 - goto out_fallback; 207 - 208 - if (bo->resource->bus.is_iomem) 209 - pfn = ttm_bo_io_mem_pfn(bo, page_offset); 210 - else 211 - pfn = page_to_pfn(ttm->pages[page_offset]); 212 - 213 - /* pfn must be fault_page_size aligned. */ 214 - if ((pfn & (fault_page_size - 1)) != 0) 215 - goto out_fallback; 216 - 217 - /* Check that memory is contiguous. */ 218 - if (!bo->resource->bus.is_iomem) { 219 - for (i = 1; i < fault_page_size; ++i) { 220 - if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) 221 - goto out_fallback; 222 - } 223 - } else if (bo->bdev->funcs->io_mem_pfn) { 224 - for (i = 1; i < fault_page_size; ++i) { 225 - if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i) 226 - goto out_fallback; 227 - } 228 - } 229 - 230 - pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); 231 - if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) 232 - ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); 233 - #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 234 - else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) 235 - ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); 236 - #endif 237 - else 238 - WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); 239 - 240 - if (ret != VM_FAULT_NOPAGE) 241 - goto out_fallback; 242 - 243 - return VM_FAULT_NOPAGE; 244 - out_fallback: 245 - count_vm_event(THP_FAULT_FALLBACK); 246 - return VM_FAULT_FALLBACK; 247 - } 248 - #else 249 - static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 250 - struct ttm_buffer_object *bo, 251 - pgoff_t page_offset, 252 - pgoff_t fault_page_size, 253 - pgprot_t pgprot) 254 - { 255 - return VM_FAULT_FALLBACK; 256 - } 257 - #endif 258 - 259 176 /** 260 177 * ttm_bo_vm_fault_reserved - TTM fault helper 261 178 * @vmf: The struct vm_fault given as argument to the fault callback ··· 180 263 * @num_prefault: Maximum number of prefault pages. The caller may want to 181 264 * specify this based on madvice settings and the size of the GPU object 182 265 * backed by the memory. 183 - * @fault_page_size: The size of the fault in pages. 184 266 * 185 267 * This function inserts one or more page table entries pointing to the 186 268 * memory backing the buffer object, and then returns a return code ··· 193 277 */ 194 278 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 195 279 pgprot_t prot, 196 - pgoff_t num_prefault, 197 - pgoff_t fault_page_size) 280 + pgoff_t num_prefault) 198 281 { 199 282 struct vm_area_struct *vma = vmf->vma; 200 283 struct ttm_buffer_object *bo = vma->vm_private_data; ··· 243 328 /* Iomem should not be marked encrypted */ 244 329 prot = pgprot_decrypted(prot); 245 330 } 246 - 247 - /* We don't prefault on huge faults. Yet. */ 248 - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) 249 - return ttm_bo_vm_insert_huge(vmf, bo, page_offset, 250 - fault_page_size, prot); 251 331 252 332 /* 253 333 * Speculatively prefault a number of pages. Only error on ··· 339 429 340 430 prot = vma->vm_page_prot; 341 431 if (drm_dev_enter(ddev, &idx)) { 342 - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); 432 + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); 343 433 drm_dev_exit(idx); 344 434 } else { 345 435 ret = ttm_bo_vm_dummy_page(vmf, prot);
-4
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
··· 1550 1550 pgoff_t start, pgoff_t end); 1551 1551 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); 1552 1552 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); 1553 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1554 - vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, 1555 - enum page_entry_size pe_size); 1556 - #endif 1557 1553 1558 1554 /* Transparent hugepage support - vmwgfx_thp.c */ 1559 1555 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+1 -71
drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
··· 477 477 else 478 478 prot = vm_get_page_prot(vma->vm_flags); 479 479 480 - ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1); 480 + ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault); 481 481 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 482 482 return ret; 483 483 ··· 486 486 487 487 return ret; 488 488 } 489 - 490 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 491 - vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, 492 - enum page_entry_size pe_size) 493 - { 494 - struct vm_area_struct *vma = vmf->vma; 495 - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 496 - vma->vm_private_data; 497 - struct vmw_buffer_object *vbo = 498 - container_of(bo, struct vmw_buffer_object, base); 499 - pgprot_t prot; 500 - vm_fault_t ret; 501 - pgoff_t fault_page_size; 502 - bool write = vmf->flags & FAULT_FLAG_WRITE; 503 - 504 - switch (pe_size) { 505 - case PE_SIZE_PMD: 506 - fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; 507 - break; 508 - #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 509 - case PE_SIZE_PUD: 510 - fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; 511 - break; 512 - #endif 513 - default: 514 - WARN_ON_ONCE(1); 515 - return VM_FAULT_FALLBACK; 516 - } 517 - 518 - /* Always do write dirty-tracking and COW on PTE level. */ 519 - if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags))) 520 - return VM_FAULT_FALLBACK; 521 - 522 - ret = ttm_bo_vm_reserve(bo, vmf); 523 - if (ret) 524 - return ret; 525 - 526 - if (vbo->dirty) { 527 - pgoff_t allowed_prefault; 528 - unsigned long page_offset; 529 - 530 - page_offset = vmf->pgoff - 531 - drm_vma_node_start(&bo->base.vma_node); 532 - if (page_offset >= bo->resource->num_pages || 533 - vmw_resources_clean(vbo, page_offset, 534 - page_offset + PAGE_SIZE, 535 - &allowed_prefault)) { 536 - ret = VM_FAULT_SIGBUS; 537 - goto out_unlock; 538 - } 539 - 540 - /* 541 - * Write protect, so we get a new fault on write, and can 542 - * split. 543 - */ 544 - prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); 545 - } else { 546 - prot = vm_get_page_prot(vma->vm_flags); 547 - } 548 - 549 - ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); 550 - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 551 - return ret; 552 - 553 - out_unlock: 554 - dma_resv_unlock(bo->base.resv); 555 - 556 - return ret; 557 - } 558 - #endif
-3
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
··· 61 61 .fault = vmw_bo_vm_fault, 62 62 .open = ttm_bo_vm_open, 63 63 .close = ttm_bo_vm_close, 64 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 65 - .huge_fault = vmw_bo_vm_huge_fault, 66 - #endif 67 64 }; 68 65 struct drm_file *file_priv = filp->private_data; 69 66 struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+1 -2
include/drm/ttm/ttm_bo_api.h
··· 584 584 585 585 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 586 586 pgprot_t prot, 587 - pgoff_t num_prefault, 588 - pgoff_t fault_page_size); 587 + pgoff_t num_prefault); 589 588 590 589 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf); 591 590