Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon: use common fence implementation for fences, v4

Changes since v1:
- Kill the sw interrupt dance, add and use
radeon_irq_kms_sw_irq_get_delayed instead.
- Change custom wait function, lockdep complained about it.
Holding exclusive_lock in the wait function might cause deadlocks.
Instead do all the processing in .enable_signaling, and wait
on the global fence_queue to pick up gpu resets.
- Process all fences in radeon_gpu_reset after reset to close a race
with the trylock in enable_signaling.
Changes since v2:
- Small changes to work with the rewritten lockup recovery patches.
Changes since v3:
- Call radeon_fence_schedule_check when exclusive_lock cannot be
acquired to always cause a wake up.
- Reset irqs from hangup check.
- Drop reading seqno in the callback, use cached value.
- Fix indentation in radeon_fence_default_wait
- Add a radeon_test_signaled function, drop a few test_bit calls.
- Make to_radeon_fence global.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Christian König <christian.koenig@amd.com>

+235 -29
+19 -4
drivers/gpu/drm/radeon/radeon.h
··· 66 66 #include <linux/kref.h> 67 67 #include <linux/interval_tree.h> 68 68 #include <linux/hashtable.h> 69 + #include <linux/fence.h> 69 70 70 71 #include <ttm/ttm_bo_api.h> 71 72 #include <ttm/ttm_bo_driver.h> ··· 355 354 /* sync_seq is protected by ring emission lock */ 356 355 uint64_t sync_seq[RADEON_NUM_RINGS]; 357 356 atomic64_t last_seq; 358 - bool initialized; 357 + bool initialized, delayed_irq; 359 358 struct delayed_work lockup_work; 360 359 }; 361 360 362 361 struct radeon_fence { 362 + struct fence base; 363 + 363 364 struct radeon_device *rdev; 364 - struct kref kref; 365 - /* protected by radeon_fence.lock */ 366 365 uint64_t seq; 367 366 /* RB, DMA, etc. */ 368 367 unsigned ring; 368 + 369 + wait_queue_t fence_wake; 369 370 }; 370 371 371 372 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); ··· 785 782 int radeon_irq_kms_init(struct radeon_device *rdev); 786 783 void radeon_irq_kms_fini(struct radeon_device *rdev); 787 784 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); 785 + bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring); 788 786 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); 789 787 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); 790 788 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); ··· 2312 2308 struct radeon_mman mman; 2313 2309 struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; 2314 2310 wait_queue_head_t fence_queue; 2311 + unsigned fence_context; 2315 2312 struct mutex ring_lock; 2316 2313 struct radeon_ring ring[RADEON_NUM_RINGS]; 2317 2314 bool ib_pool_ready; ··· 2446 2441 /* 2447 2442 * Cast helper 2448 2443 */ 2449 - #define to_radeon_fence(p) ((struct radeon_fence *)(p)) 2444 + extern const struct fence_ops radeon_fence_ops; 2445 + 2446 + static inline struct radeon_fence *to_radeon_fence(struct fence *f) 2447 + { 2448 + struct radeon_fence *__f = container_of(f, struct radeon_fence, base); 2449 + 2450 + if (__f->base.ops == &radeon_fence_ops) 2451 + return __f; 2452 + 2453 + return NULL; 2454 + } 2450 2455 2451 2456 /* 2452 2457 * Registers read & write functions.
+1
drivers/gpu/drm/radeon/radeon_device.c
··· 1253 1253 for (i = 0; i < RADEON_NUM_RINGS; i++) { 1254 1254 rdev->ring[i].idx = i; 1255 1255 } 1256 + rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); 1256 1257 1257 1258 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", 1258 1259 radeon_family_name[rdev->family], pdev->vendor, pdev->device,
+200 -25
drivers/gpu/drm/radeon/radeon_fence.c
··· 130 130 struct radeon_fence **fence, 131 131 int ring) 132 132 { 133 + u64 seq = ++rdev->fence_drv[ring].sync_seq[ring]; 134 + 133 135 /* we are protected by the ring emission mutex */ 134 136 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); 135 137 if ((*fence) == NULL) { 136 138 return -ENOMEM; 137 139 } 138 - kref_init(&((*fence)->kref)); 139 140 (*fence)->rdev = rdev; 140 - (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; 141 + (*fence)->seq = seq; 141 142 (*fence)->ring = ring; 143 + fence_init(&(*fence)->base, &radeon_fence_ops, 144 + &rdev->fence_queue.lock, rdev->fence_context + ring, seq); 142 145 radeon_fence_ring_emit(rdev, ring, *fence); 143 146 trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); 144 147 radeon_fence_schedule_check(rdev, ring); 148 + return 0; 149 + } 150 + 151 + /** 152 + * radeon_fence_check_signaled - callback from fence_queue 153 + * 154 + * this function is called with fence_queue lock held, which is also used 155 + * for the fence locking itself, so unlocked variants are used for 156 + * fence_signal, and remove_wait_queue. 157 + */ 158 + static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) 159 + { 160 + struct radeon_fence *fence; 161 + u64 seq; 162 + 163 + fence = container_of(wait, struct radeon_fence, fence_wake); 164 + 165 + /* 166 + * We cannot use radeon_fence_process here because we're already 167 + * in the waitqueue, in a call from wake_up_all. 168 + */ 169 + seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); 170 + if (seq >= fence->seq) { 171 + int ret = fence_signal_locked(&fence->base); 172 + 173 + if (!ret) 174 + FENCE_TRACE(&fence->base, "signaled from irq context\n"); 175 + else 176 + FENCE_TRACE(&fence->base, "was already signaled\n"); 177 + 178 + radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); 179 + __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); 180 + fence_put(&fence->base); 181 + } else 182 + FENCE_TRACE(&fence->base, "pending\n"); 145 183 return 0; 146 184 } 147 185 ··· 280 242 return; 281 243 } 282 244 245 + if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { 246 + unsigned long irqflags; 247 + 248 + fence_drv->delayed_irq = false; 249 + spin_lock_irqsave(&rdev->irq.lock, irqflags); 250 + radeon_irq_set(rdev); 251 + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); 252 + } 253 + 283 254 if (radeon_fence_activity(rdev, ring)) 284 255 wake_up_all(&rdev->fence_queue); 285 256 ··· 323 276 } 324 277 325 278 /** 326 - * radeon_fence_destroy - destroy a fence 327 - * 328 - * @kref: fence kref 329 - * 330 - * Frees the fence object (all asics). 331 - */ 332 - static void radeon_fence_destroy(struct kref *kref) 333 - { 334 - struct radeon_fence *fence; 335 - 336 - fence = container_of(kref, struct radeon_fence, kref); 337 - kfree(fence); 338 - } 339 - 340 - /** 341 279 * radeon_fence_seq_signaled - check if a fence sequence number has signaled 342 280 * 343 281 * @rdev: radeon device pointer ··· 350 318 return false; 351 319 } 352 320 321 + static bool radeon_fence_is_signaled(struct fence *f) 322 + { 323 + struct radeon_fence *fence = to_radeon_fence(f); 324 + struct radeon_device *rdev = fence->rdev; 325 + unsigned ring = fence->ring; 326 + u64 seq = fence->seq; 327 + 328 + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 329 + return true; 330 + } 331 + 332 + if (down_read_trylock(&rdev->exclusive_lock)) { 333 + radeon_fence_process(rdev, ring); 334 + up_read(&rdev->exclusive_lock); 335 + 336 + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 337 + return true; 338 + } 339 + } 340 + return false; 341 + } 342 + 343 + /** 344 + * radeon_fence_enable_signaling - enable signalling on fence 345 + * @fence: fence 346 + * 347 + * This function is called with fence_queue lock held, and adds a callback 348 + * to fence_queue that checks if this fence is signaled, and if so it 349 + * signals the fence and removes itself. 350 + */ 351 + static bool radeon_fence_enable_signaling(struct fence *f) 352 + { 353 + struct radeon_fence *fence = to_radeon_fence(f); 354 + struct radeon_device *rdev = fence->rdev; 355 + 356 + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) 357 + return false; 358 + 359 + if (down_read_trylock(&rdev->exclusive_lock)) { 360 + radeon_irq_kms_sw_irq_get(rdev, fence->ring); 361 + 362 + if (radeon_fence_activity(rdev, fence->ring)) 363 + wake_up_all_locked(&rdev->fence_queue); 364 + 365 + /* did fence get signaled after we enabled the sw irq? */ 366 + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { 367 + radeon_irq_kms_sw_irq_put(rdev, fence->ring); 368 + up_read(&rdev->exclusive_lock); 369 + return false; 370 + } 371 + 372 + up_read(&rdev->exclusive_lock); 373 + } else { 374 + /* we're probably in a lockup, lets not fiddle too much */ 375 + if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) 376 + rdev->fence_drv[fence->ring].delayed_irq = true; 377 + radeon_fence_schedule_check(rdev, fence->ring); 378 + } 379 + 380 + fence->fence_wake.flags = 0; 381 + fence->fence_wake.private = NULL; 382 + fence->fence_wake.func = radeon_fence_check_signaled; 383 + __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); 384 + fence_get(f); 385 + 386 + FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); 387 + return true; 388 + } 389 + 353 390 /** 354 391 * radeon_fence_signaled - check if a fence has signaled 355 392 * ··· 431 330 { 432 331 if (!fence) 433 332 return true; 434 - if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) 333 + 334 + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { 335 + int ret; 336 + 337 + ret = fence_signal(&fence->base); 338 + if (!ret) 339 + FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); 435 340 return true; 341 + } 436 342 return false; 437 343 } 438 344 ··· 541 433 uint64_t seq[RADEON_NUM_RINGS] = {}; 542 434 long r; 543 435 544 - if (fence == NULL) { 545 - WARN(1, "Querying an invalid fence : %p !\n", fence); 546 - return -EINVAL; 547 - } 548 - 549 436 seq[fence->ring] = fence->seq; 550 437 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); 551 438 if (r < 0) { 552 439 return r; 553 440 } 554 441 442 + r = fence_signal(&fence->base); 443 + if (!r) 444 + FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); 555 445 return 0; 556 446 } 557 447 ··· 663 557 */ 664 558 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) 665 559 { 666 - kref_get(&fence->kref); 560 + fence_get(&fence->base); 667 561 return fence; 668 562 } 669 563 ··· 680 574 681 575 *fence = NULL; 682 576 if (tmp) { 683 - kref_put(&tmp->kref, radeon_fence_destroy); 577 + fence_put(&tmp->base); 684 578 } 685 579 } 686 580 ··· 993 887 return 0; 994 888 #endif 995 889 } 890 + 891 + static const char *radeon_fence_get_driver_name(struct fence *fence) 892 + { 893 + return "radeon"; 894 + } 895 + 896 + static const char *radeon_fence_get_timeline_name(struct fence *f) 897 + { 898 + struct radeon_fence *fence = to_radeon_fence(f); 899 + switch (fence->ring) { 900 + case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; 901 + case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; 902 + case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; 903 + case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; 904 + case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; 905 + case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; 906 + case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; 907 + case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; 908 + default: WARN_ON_ONCE(1); return "radeon.unk"; 909 + } 910 + } 911 + 912 + static inline bool radeon_test_signaled(struct radeon_fence *fence) 913 + { 914 + return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 915 + } 916 + 917 + static signed long radeon_fence_default_wait(struct fence *f, bool intr, 918 + signed long t) 919 + { 920 + struct radeon_fence *fence = to_radeon_fence(f); 921 + struct radeon_device *rdev = fence->rdev; 922 + bool signaled; 923 + 924 + fence_enable_sw_signaling(&fence->base); 925 + 926 + /* 927 + * This function has to return -EDEADLK, but cannot hold 928 + * exclusive_lock during the wait because some callers 929 + * may already hold it. This means checking needs_reset without 930 + * lock, and not fiddling with any gpu internals. 931 + * 932 + * The callback installed with fence_enable_sw_signaling will 933 + * run before our wait_event_*timeout call, so we will see 934 + * both the signaled fence and the changes to needs_reset. 935 + */ 936 + 937 + if (intr) 938 + t = wait_event_interruptible_timeout(rdev->fence_queue, 939 + ((signaled = radeon_test_signaled(fence)) || 940 + rdev->needs_reset), t); 941 + else 942 + t = wait_event_timeout(rdev->fence_queue, 943 + ((signaled = radeon_test_signaled(fence)) || 944 + rdev->needs_reset), t); 945 + 946 + if (t > 0 && !signaled) 947 + return -EDEADLK; 948 + return t; 949 + } 950 + 951 + const struct fence_ops radeon_fence_ops = { 952 + .get_driver_name = radeon_fence_get_driver_name, 953 + .get_timeline_name = radeon_fence_get_timeline_name, 954 + .enable_signaling = radeon_fence_enable_signaling, 955 + .signaled = radeon_fence_is_signaled, 956 + .wait = radeon_fence_default_wait, 957 + .release = NULL, 958 + };
+15
drivers/gpu/drm/radeon/radeon_irq_kms.c
··· 324 324 } 325 325 326 326 /** 327 + * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt 328 + * 329 + * @rdev: radeon device pointer 330 + * @ring: ring whose interrupt you want to enable 331 + * 332 + * Enables the software interrupt for a specific ring (all asics). 333 + * The software interrupt is generally used to signal a fence on 334 + * a particular ring. 335 + */ 336 + bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring) 337 + { 338 + return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1; 339 + } 340 + 341 + /** 327 342 * radeon_irq_kms_sw_irq_put - disable software interrupt 328 343 * 329 344 * @rdev: radeon device pointer