Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/gntdev: Avoid blocking in unmap_grant_pages()

unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.

Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.

Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.

It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.

Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>

authored by

Demi Marie Obenour and committed by
Juergen Gross
dbe97cff ca696901

+113 -51
+7
drivers/xen/gntdev-common.h
··· 16 16 #include <linux/mmu_notifier.h> 17 17 #include <linux/types.h> 18 18 #include <xen/interface/event_channel.h> 19 + #include <xen/grant_table.h> 19 20 20 21 struct gntdev_dmabuf_priv; 21 22 ··· 57 56 struct gnttab_unmap_grant_ref *unmap_ops; 58 57 struct gnttab_map_grant_ref *kmap_ops; 59 58 struct gnttab_unmap_grant_ref *kunmap_ops; 59 + bool *being_removed; 60 60 struct page **pages; 61 61 unsigned long pages_vm_start; 62 62 ··· 75 73 /* Needed to avoid allocation in gnttab_dma_free_pages(). */ 76 74 xen_pfn_t *frames; 77 75 #endif 76 + 77 + /* Number of live grants */ 78 + atomic_t live_grants; 79 + /* Needed to avoid allocation in __unmap_grant_pages */ 80 + struct gntab_unmap_queue_data unmap_data; 78 81 }; 79 82 80 83 struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
+106 -51
drivers/xen/gntdev.c
··· 35 35 #include <linux/slab.h> 36 36 #include <linux/highmem.h> 37 37 #include <linux/refcount.h> 38 + #include <linux/workqueue.h> 38 39 39 40 #include <xen/xen.h> 40 41 #include <xen/grant_table.h> ··· 61 60 MODULE_PARM_DESC(limit, 62 61 "Maximum number of grants that may be mapped by one mapping request"); 63 62 63 + /* True in PV mode, false otherwise */ 64 64 static int use_ptemod; 65 65 66 - static int unmap_grant_pages(struct gntdev_grant_map *map, 67 - int offset, int pages); 66 + static void unmap_grant_pages(struct gntdev_grant_map *map, 67 + int offset, int pages); 68 68 69 69 static struct miscdevice gntdev_miscdev; 70 70 ··· 122 120 kvfree(map->unmap_ops); 123 121 kvfree(map->kmap_ops); 124 122 kvfree(map->kunmap_ops); 123 + kvfree(map->being_removed); 125 124 kfree(map); 126 125 } 127 126 ··· 143 140 add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), 144 141 GFP_KERNEL); 145 142 add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); 143 + add->being_removed = 144 + kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); 146 145 if (NULL == add->grants || 147 146 NULL == add->map_ops || 148 147 NULL == add->unmap_ops || 149 - NULL == add->pages) 148 + NULL == add->pages || 149 + NULL == add->being_removed) 150 150 goto err; 151 151 if (use_ptemod) { 152 152 add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), ··· 256 250 if (!refcount_dec_and_test(&map->users)) 257 251 return; 258 252 259 - if (map->pages && !use_ptemod) 253 + if (map->pages && !use_ptemod) { 254 + /* 255 + * Increment the reference count. This ensures that the 256 + * subsequent call to unmap_grant_pages() will not wind up 257 + * re-entering itself. It *can* wind up calling 258 + * gntdev_put_map() recursively, but such calls will be with a 259 + * reference count greater than 1, so they will return before 260 + * this code is reached. The recursion depth is thus limited to 261 + * 1. Do NOT use refcount_inc() here, as it will detect that 262 + * the reference count is zero and WARN(). 263 + */ 264 + refcount_set(&map->users, 1); 265 + 266 + /* 267 + * Unmap the grants. This may or may not be asynchronous, so it 268 + * is possible that the reference count is 1 on return, but it 269 + * could also be greater than 1. 270 + */ 260 271 unmap_grant_pages(map, 0, map->count); 272 + 273 + /* Check if the memory now needs to be freed */ 274 + if (!refcount_dec_and_test(&map->users)) 275 + return; 276 + 277 + /* 278 + * All pages have been returned to the hypervisor, so free the 279 + * map. 280 + */ 281 + } 261 282 262 283 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 263 284 notify_remote_via_evtchn(map->notify.event); ··· 316 283 317 284 int gntdev_map_grant_pages(struct gntdev_grant_map *map) 318 285 { 286 + size_t alloced = 0; 319 287 int i, err = 0; 320 288 321 289 if (!use_ptemod) { ··· 365 331 map->count); 366 332 367 333 for (i = 0; i < map->count; i++) { 368 - if (map->map_ops[i].status == GNTST_okay) 334 + if (map->map_ops[i].status == GNTST_okay) { 369 335 map->unmap_ops[i].handle = map->map_ops[i].handle; 370 - else if (!err) 336 + if (!use_ptemod) 337 + alloced++; 338 + } else if (!err) 371 339 err = -EINVAL; 372 340 373 341 if (map->flags & GNTMAP_device_map) 374 342 map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; 375 343 376 344 if (use_ptemod) { 377 - if (map->kmap_ops[i].status == GNTST_okay) 345 + if (map->kmap_ops[i].status == GNTST_okay) { 346 + if (map->map_ops[i].status == GNTST_okay) 347 + alloced++; 378 348 map->kunmap_ops[i].handle = map->kmap_ops[i].handle; 379 - else if (!err) 349 + } else if (!err) 380 350 err = -EINVAL; 381 351 } 382 352 } 353 + atomic_add(alloced, &map->live_grants); 383 354 return err; 384 355 } 385 356 386 - static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, 387 - int pages) 357 + static void __unmap_grant_pages_done(int result, 358 + struct gntab_unmap_queue_data *data) 388 359 { 389 - int i, err = 0; 390 - struct gntab_unmap_queue_data unmap_data; 360 + unsigned int i; 361 + struct gntdev_grant_map *map = data->data; 362 + unsigned int offset = data->unmap_ops - map->unmap_ops; 391 363 392 - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 393 - int pgno = (map->notify.addr >> PAGE_SHIFT); 394 - if (pgno >= offset && pgno < offset + pages) { 395 - /* No need for kmap, pages are in lowmem */ 396 - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); 397 - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; 398 - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 399 - } 400 - } 401 - 402 - unmap_data.unmap_ops = map->unmap_ops + offset; 403 - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; 404 - unmap_data.pages = map->pages + offset; 405 - unmap_data.count = pages; 406 - 407 - err = gnttab_unmap_refs_sync(&unmap_data); 408 - if (err) 409 - return err; 410 - 411 - for (i = 0; i < pages; i++) { 412 - if (map->unmap_ops[offset+i].status) 413 - err = -EINVAL; 364 + for (i = 0; i < data->count; i++) { 365 + WARN_ON(map->unmap_ops[offset+i].status); 414 366 pr_debug("unmap handle=%d st=%d\n", 415 367 map->unmap_ops[offset+i].handle, 416 368 map->unmap_ops[offset+i].status); 417 369 map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; 418 370 if (use_ptemod) { 419 - if (map->kunmap_ops[offset+i].status) 420 - err = -EINVAL; 371 + WARN_ON(map->kunmap_ops[offset+i].status); 421 372 pr_debug("kunmap handle=%u st=%d\n", 422 373 map->kunmap_ops[offset+i].handle, 423 374 map->kunmap_ops[offset+i].status); 424 375 map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; 425 376 } 426 377 } 427 - return err; 378 + /* 379 + * Decrease the live-grant counter. This must happen after the loop to 380 + * prevent premature reuse of the grants by gnttab_mmap(). 381 + */ 382 + atomic_sub(data->count, &map->live_grants); 383 + 384 + /* Release reference taken by __unmap_grant_pages */ 385 + gntdev_put_map(NULL, map); 428 386 } 429 387 430 - static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, 431 - int pages) 388 + static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, 389 + int pages) 432 390 { 433 - int range, err = 0; 391 + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 392 + int pgno = (map->notify.addr >> PAGE_SHIFT); 393 + 394 + if (pgno >= offset && pgno < offset + pages) { 395 + /* No need for kmap, pages are in lowmem */ 396 + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); 397 + 398 + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; 399 + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 400 + } 401 + } 402 + 403 + map->unmap_data.unmap_ops = map->unmap_ops + offset; 404 + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; 405 + map->unmap_data.pages = map->pages + offset; 406 + map->unmap_data.count = pages; 407 + map->unmap_data.done = __unmap_grant_pages_done; 408 + map->unmap_data.data = map; 409 + refcount_inc(&map->users); /* to keep map alive during async call below */ 410 + 411 + gnttab_unmap_refs_async(&map->unmap_data); 412 + } 413 + 414 + static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, 415 + int pages) 416 + { 417 + int range; 418 + 419 + if (atomic_read(&map->live_grants) == 0) 420 + return; /* Nothing to do */ 434 421 435 422 pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 436 423 437 424 /* It is possible the requested range will have a "hole" where we 438 425 * already unmapped some of the grants. Only unmap valid ranges. 439 426 */ 440 - while (pages && !err) { 441 - while (pages && 442 - map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { 427 + while (pages) { 428 + while (pages && map->being_removed[offset]) { 443 429 offset++; 444 430 pages--; 445 431 } 446 432 range = 0; 447 433 while (range < pages) { 448 - if (map->unmap_ops[offset + range].handle == 449 - INVALID_GRANT_HANDLE) 434 + if (map->being_removed[offset + range]) 450 435 break; 436 + map->being_removed[offset + range] = true; 451 437 range++; 452 438 } 453 - err = __unmap_grant_pages(map, offset, range); 439 + if (range) 440 + __unmap_grant_pages(map, offset, range); 454 441 offset += range; 455 442 pages -= range; 456 443 } 457 - 458 - return err; 459 444 } 460 445 461 446 /* ------------------------------------------------------------------ */ ··· 526 473 struct gntdev_grant_map *map = 527 474 container_of(mn, struct gntdev_grant_map, notifier); 528 475 unsigned long mstart, mend; 529 - int err; 530 476 531 477 if (!mmu_notifier_range_blockable(range)) 532 478 return false; ··· 546 494 map->index, map->count, 547 495 map->vma->vm_start, map->vma->vm_end, 548 496 range->start, range->end, mstart, mend); 549 - err = unmap_grant_pages(map, 497 + unmap_grant_pages(map, 550 498 (mstart - map->vma->vm_start) >> PAGE_SHIFT, 551 499 (mend - mstart) >> PAGE_SHIFT); 552 - WARN_ON(err); 553 500 554 501 return true; 555 502 } ··· 1036 985 goto unlock_out; 1037 986 if (use_ptemod && map->vma) 1038 987 goto unlock_out; 988 + if (atomic_read(&map->live_grants)) { 989 + err = -EAGAIN; 990 + goto unlock_out; 991 + } 1039 992 refcount_inc(&map->users); 1040 993 1041 994 vma->vm_ops = &gntdev_vmops;