drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device fence_lock

+6 -6

drivers/gpu/drm/nouveau/nouveau_gem.c

··· 234 234 if (likely(fence)) { 235 235 struct nouveau_fence *prev_fence; 236 236 237 - spin_lock(&nvbo->bo.lock); 237 + spin_lock(&nvbo->bo.bdev->fence_lock); 238 238 prev_fence = nvbo->bo.sync_obj; 239 239 nvbo->bo.sync_obj = nouveau_fence_ref(fence); 240 - spin_unlock(&nvbo->bo.lock); 240 + spin_unlock(&nvbo->bo.bdev->fence_lock); 241 241 nouveau_fence_unref((void *)&prev_fence); 242 242 } 243 243 ··· 557 557 data |= r->vor; 558 558 } 559 559 560 - spin_lock(&nvbo->bo.lock); 560 + spin_lock(&nvbo->bo.bdev->fence_lock); 561 561 ret = ttm_bo_wait(&nvbo->bo, false, false, false); 562 - spin_unlock(&nvbo->bo.lock); 562 + spin_unlock(&nvbo->bo.bdev->fence_lock); 563 563 if (ret) { 564 564 NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret); 565 565 break; ··· 791 791 } 792 792 793 793 if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) { 794 - spin_lock(&nvbo->bo.lock); 794 + spin_lock(&nvbo->bo.bdev->fence_lock); 795 795 ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait); 796 - spin_unlock(&nvbo->bo.lock); 796 + spin_unlock(&nvbo->bo.bdev->fence_lock); 797 797 } else { 798 798 ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait); 799 799 if (ret == 0)

+2 -2

drivers/gpu/drm/radeon/radeon_object.c

··· 369 369 370 370 list_for_each_entry(lobj, head, list) { 371 371 bo = lobj->bo; 372 - spin_lock(&bo->tbo.lock); 372 + spin_lock(&bo->tbo.bdev->fence_lock); 373 373 old_fence = (struct radeon_fence *)bo->tbo.sync_obj; 374 374 bo->tbo.sync_obj = radeon_fence_ref(fence); 375 375 bo->tbo.sync_obj_arg = NULL; 376 - spin_unlock(&bo->tbo.lock); 376 + spin_unlock(&bo->tbo.bdev->fence_lock); 377 377 if (old_fence) { 378 378 radeon_fence_unref(&old_fence); 379 379 }

+2 -2

drivers/gpu/drm/radeon/radeon_object.h

··· 126 126 r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); 127 127 if (unlikely(r != 0)) 128 128 return r; 129 - spin_lock(&bo->tbo.lock); 129 + spin_lock(&bo->tbo.bdev->fence_lock); 130 130 if (mem_type) 131 131 *mem_type = bo->tbo.mem.mem_type; 132 132 if (bo->tbo.sync_obj) 133 133 r = ttm_bo_wait(&bo->tbo, true, true, no_wait); 134 - spin_unlock(&bo->tbo.lock); 134 + spin_unlock(&bo->tbo.bdev->fence_lock); 135 135 ttm_bo_unreserve(&bo->tbo); 136 136 return r; 137 137 }

+28 -27

drivers/gpu/drm/ttm/ttm_bo.c

··· 427 427 } 428 428 429 429 if (bo->mem.mm_node) { 430 - spin_lock(&bo->lock); 431 430 bo->offset = (bo->mem.start << PAGE_SHIFT) + 432 431 bdev->man[bo->mem.mem_type].gpu_offset; 433 432 bo->cur_placement = bo->mem.placement; 434 - spin_unlock(&bo->lock); 435 433 } else 436 434 bo->offset = 0; 437 435 ··· 483 485 int put_count; 484 486 int ret; 485 487 486 - spin_lock(&bo->lock); 488 + spin_lock(&bdev->fence_lock); 487 489 (void) ttm_bo_wait(bo, false, false, true); 488 490 if (!bo->sync_obj) { 489 491 490 492 spin_lock(&glob->lru_lock); 491 493 492 494 /** 493 - * Lock inversion between bo::reserve and bo::lock here, 495 + * Lock inversion between bo:reserve and bdev::fence_lock here, 494 496 * but that's OK, since we're only trylocking. 495 497 */ 496 498 ··· 499 501 if (unlikely(ret == -EBUSY)) 500 502 goto queue; 501 503 502 - spin_unlock(&bo->lock); 504 + spin_unlock(&bdev->fence_lock); 503 505 put_count = ttm_bo_del_from_lru(bo); 504 506 505 507 spin_unlock(&glob->lru_lock); ··· 520 522 kref_get(&bo->list_kref); 521 523 list_add_tail(&bo->ddestroy, &bdev->ddestroy); 522 524 spin_unlock(&glob->lru_lock); 523 - spin_unlock(&bo->lock); 525 + spin_unlock(&bdev->fence_lock); 524 526 525 527 if (sync_obj) { 526 528 driver->sync_obj_flush(sync_obj, sync_obj_arg); ··· 545 547 bool no_wait_reserve, 546 548 bool no_wait_gpu) 547 549 { 550 + struct ttm_bo_device *bdev = bo->bdev; 548 551 struct ttm_bo_global *glob = bo->glob; 549 552 int put_count; 550 553 int ret = 0; 551 554 552 555 retry: 553 - spin_lock(&bo->lock); 556 + spin_lock(&bdev->fence_lock); 554 557 ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); 555 - spin_unlock(&bo->lock); 558 + spin_unlock(&bdev->fence_lock); 556 559 557 560 if (unlikely(ret != 0)) 558 561 return ret; ··· 706 707 struct ttm_placement placement; 707 708 int ret = 0; 708 709 709 - spin_lock(&bo->lock); 710 + spin_lock(&bdev->fence_lock); 710 711 ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); 711 - spin_unlock(&bo->lock); 712 + spin_unlock(&bdev->fence_lock); 712 713 713 714 if (unlikely(ret != 0)) { 714 715 if (ret != -ERESTARTSYS) { ··· 1043 1044 { 1044 1045 int ret = 0; 1045 1046 struct ttm_mem_reg mem; 1047 + struct ttm_bo_device *bdev = bo->bdev; 1046 1048 1047 1049 BUG_ON(!atomic_read(&bo->reserved)); 1048 1050 ··· 1052 1052 * Have the driver move function wait for idle when necessary, 1053 1053 * instead of doing it here. 1054 1054 */ 1055 - spin_lock(&bo->lock); 1055 + spin_lock(&bdev->fence_lock); 1056 1056 ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); 1057 - spin_unlock(&bo->lock); 1057 + spin_unlock(&bdev->fence_lock); 1058 1058 if (ret) 1059 1059 return ret; 1060 1060 mem.num_pages = bo->num_pages; ··· 1171 1171 } 1172 1172 bo->destroy = destroy; 1173 1173 1174 - spin_lock_init(&bo->lock); 1175 1174 kref_init(&bo->kref); 1176 1175 kref_init(&bo->list_kref); 1177 1176 atomic_set(&bo->cpu_writers, 0); ··· 1534 1535 bdev->dev_mapping = NULL; 1535 1536 bdev->glob = glob; 1536 1537 bdev->need_dma32 = need_dma32; 1537 - 1538 + spin_lock_init(&bdev->fence_lock); 1538 1539 mutex_lock(&glob->device_list_mutex); 1539 1540 list_add_tail(&bdev->device_list, &glob->device_list); 1540 1541 mutex_unlock(&glob->device_list_mutex); ··· 1658 1659 bool lazy, bool interruptible, bool no_wait) 1659 1660 { 1660 1661 struct ttm_bo_driver *driver = bo->bdev->driver; 1662 + struct ttm_bo_device *bdev = bo->bdev; 1661 1663 void *sync_obj; 1662 1664 void *sync_obj_arg; 1663 1665 int ret = 0; ··· 1672 1672 void *tmp_obj = bo->sync_obj; 1673 1673 bo->sync_obj = NULL; 1674 1674 clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); 1675 - spin_unlock(&bo->lock); 1675 + spin_unlock(&bdev->fence_lock); 1676 1676 driver->sync_obj_unref(&tmp_obj); 1677 - spin_lock(&bo->lock); 1677 + spin_lock(&bdev->fence_lock); 1678 1678 continue; 1679 1679 } 1680 1680 ··· 1683 1683 1684 1684 sync_obj = driver->sync_obj_ref(bo->sync_obj); 1685 1685 sync_obj_arg = bo->sync_obj_arg; 1686 - spin_unlock(&bo->lock); 1686 + spin_unlock(&bdev->fence_lock); 1687 1687 ret = driver->sync_obj_wait(sync_obj, sync_obj_arg, 1688 1688 lazy, interruptible); 1689 1689 if (unlikely(ret != 0)) { 1690 1690 driver->sync_obj_unref(&sync_obj); 1691 - spin_lock(&bo->lock); 1691 + spin_lock(&bdev->fence_lock); 1692 1692 return ret; 1693 1693 } 1694 - spin_lock(&bo->lock); 1694 + spin_lock(&bdev->fence_lock); 1695 1695 if (likely(bo->sync_obj == sync_obj && 1696 1696 bo->sync_obj_arg == sync_obj_arg)) { 1697 1697 void *tmp_obj = bo->sync_obj; 1698 1698 bo->sync_obj = NULL; 1699 1699 clear_bit(TTM_BO_PRIV_FLAG_MOVING, 1700 1700 &bo->priv_flags); 1701 - spin_unlock(&bo->lock); 1701 + spin_unlock(&bdev->fence_lock); 1702 1702 driver->sync_obj_unref(&sync_obj); 1703 1703 driver->sync_obj_unref(&tmp_obj); 1704 - spin_lock(&bo->lock); 1704 + spin_lock(&bdev->fence_lock); 1705 1705 } else { 1706 - spin_unlock(&bo->lock); 1706 + spin_unlock(&bdev->fence_lock); 1707 1707 driver->sync_obj_unref(&sync_obj); 1708 - spin_lock(&bo->lock); 1708 + spin_lock(&bdev->fence_lock); 1709 1709 } 1710 1710 } 1711 1711 return 0; ··· 1714 1714 1715 1715 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) 1716 1716 { 1717 + struct ttm_bo_device *bdev = bo->bdev; 1717 1718 int ret = 0; 1718 1719 1719 1720 /* ··· 1724 1723 ret = ttm_bo_reserve(bo, true, no_wait, false, 0); 1725 1724 if (unlikely(ret != 0)) 1726 1725 return ret; 1727 - spin_lock(&bo->lock); 1726 + spin_lock(&bdev->fence_lock); 1728 1727 ret = ttm_bo_wait(bo, false, true, no_wait); 1729 - spin_unlock(&bo->lock); 1728 + spin_unlock(&bdev->fence_lock); 1730 1729 if (likely(ret == 0)) 1731 1730 atomic_inc(&bo->cpu_writers); 1732 1731 ttm_bo_unreserve(bo); ··· 1798 1797 * Wait for GPU, then move to system cached. 1799 1798 */ 1800 1799 1801 - spin_lock(&bo->lock); 1800 + spin_lock(&bo->bdev->fence_lock); 1802 1801 ret = ttm_bo_wait(bo, false, false, false); 1803 - spin_unlock(&bo->lock); 1802 + spin_unlock(&bo->bdev->fence_lock); 1804 1803 1805 1804 if (unlikely(ret != 0)) 1806 1805 goto out;

+3 -4

drivers/gpu/drm/ttm/ttm_bo_util.c

··· 337 337 * TODO: Explicit member copy would probably be better here. 338 338 */ 339 339 340 - spin_lock_init(&fbo->lock); 341 340 init_waitqueue_head(&fbo->event_queue); 342 341 INIT_LIST_HEAD(&fbo->ddestroy); 343 342 INIT_LIST_HEAD(&fbo->lru); ··· 519 520 struct ttm_buffer_object *ghost_obj; 520 521 void *tmp_obj = NULL; 521 522 522 - spin_lock(&bo->lock); 523 + spin_lock(&bdev->fence_lock); 523 524 if (bo->sync_obj) { 524 525 tmp_obj = bo->sync_obj; 525 526 bo->sync_obj = NULL; ··· 528 529 bo->sync_obj_arg = sync_obj_arg; 529 530 if (evict) { 530 531 ret = ttm_bo_wait(bo, false, false, false); 531 - spin_unlock(&bo->lock); 532 + spin_unlock(&bdev->fence_lock); 532 533 if (tmp_obj) 533 534 driver->sync_obj_unref(&tmp_obj); 534 535 if (ret) ··· 551 552 */ 552 553 553 554 set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); 554 - spin_unlock(&bo->lock); 555 + spin_unlock(&bdev->fence_lock); 555 556 if (tmp_obj) 556 557 driver->sync_obj_unref(&tmp_obj); 557 558

+3 -3

drivers/gpu/drm/ttm/ttm_bo_vm.c

··· 118 118 * move. 119 119 */ 120 120 121 - spin_lock(&bo->lock); 121 + spin_lock(&bdev->fence_lock); 122 122 if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { 123 123 ret = ttm_bo_wait(bo, false, true, false); 124 - spin_unlock(&bo->lock); 124 + spin_unlock(&bdev->fence_lock); 125 125 if (unlikely(ret != 0)) { 126 126 retval = (ret != -ERESTARTSYS) ? 127 127 VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; 128 128 goto out_unlock; 129 129 } 130 130 } else 131 - spin_unlock(&bo->lock); 131 + spin_unlock(&bdev->fence_lock); 132 132 133 133 134 134 ret = ttm_mem_io_reserve(bdev, &bo->mem);

+4 -3

drivers/gpu/drm/ttm/ttm_execbuf_util.c

··· 203 203 204 204 list_for_each_entry(entry, list, head) { 205 205 struct ttm_buffer_object *bo = entry->bo; 206 - struct ttm_bo_driver *driver = bo->bdev->driver; 206 + struct ttm_bo_device *bdev = bo->bdev; 207 + struct ttm_bo_driver *driver = bdev->driver; 207 208 void *old_sync_obj; 208 209 209 - spin_lock(&bo->lock); 210 + spin_lock(&bdev->fence_lock); 210 211 old_sync_obj = bo->sync_obj; 211 212 bo->sync_obj = driver->sync_obj_ref(sync_obj); 212 213 bo->sync_obj_arg = entry->new_sync_obj_arg; 213 - spin_unlock(&bo->lock); 214 + spin_unlock(&bdev->fence_lock); 214 215 ttm_bo_unreserve(bo); 215 216 entry->reserved = false; 216 217 if (old_sync_obj)

+2 -4

include/drm/ttm/ttm_bo_api.h

··· 154 154 * keeps one refcount. When this refcount reaches zero, 155 155 * the object is destroyed. 156 156 * @event_queue: Queue for processes waiting on buffer object status change. 157 - * @lock: spinlock protecting mostly synchronization members. 158 157 * @mem: structure describing current placement. 159 158 * @persistant_swap_storage: Usually the swap storage is deleted for buffers 160 159 * pinned in physical memory. If this behaviour is not desired, this member ··· 212 213 struct kref kref; 213 214 struct kref list_kref; 214 215 wait_queue_head_t event_queue; 215 - spinlock_t lock; 216 216 217 217 /** 218 218 * Members protected by the bo::reserved lock. ··· 246 248 atomic_t reserved; 247 249 248 250 /** 249 - * Members protected by the bo::lock 251 + * Members protected by struct buffer_object_device::fence_lock 250 252 * In addition, setting sync_obj to anything else 251 253 * than NULL requires bo::reserved to be held. This allows for 252 - * checking NULL while reserved but not holding bo::lock. 254 + * checking NULL while reserved but not holding the mentioned lock. 253 255 */ 254 256 255 257 void *sync_obj_arg;

+3

include/drm/ttm/ttm_bo_driver.h

··· 510 510 * 511 511 * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. 512 512 * @man: An array of mem_type_managers. 513 + * @fence_lock: Protects the synchronizing members on *all* bos belonging 514 + * to this device. 513 515 * @addr_space_mm: Range manager for the device address space. 514 516 * lru_lock: Spinlock that protects the buffer+device lru lists and 515 517 * ddestroy lists. ··· 533 531 struct ttm_bo_driver *driver; 534 532 rwlock_t vm_lock; 535 533 struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; 534 + spinlock_t fence_lock; 536 535 /* 537 536 * Protected by the vm lock. 538 537 */