Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

nouveau: push event block/allowing out of the fence context

There is a deadlock between the irq and fctx locks,
the irq handling takes irq then fctx lock
the fence signalling takes fctx then irq lock

This splits the fence signalling path so the code that hits
the irq lock is done in a separate work queue.

This seems to fix crashes/hangs when using nouveau gsp with
i915 primary GPU.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-11-airlied@gmail.com

authored by

Dave Airlie and committed by
Dave Airlie
eacabb54 9c9dd22b

+27 -6
+23 -5
drivers/gpu/drm/nouveau/nouveau_fence.c
··· 62 62 if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { 63 63 struct nouveau_fence_chan *fctx = nouveau_fctx(fence); 64 64 65 - if (!--fctx->notify_ref) 65 + if (atomic_dec_and_test(&fctx->notify_ref)) 66 66 drop = 1; 67 67 } 68 68 ··· 103 103 void 104 104 nouveau_fence_context_del(struct nouveau_fence_chan *fctx) 105 105 { 106 + cancel_work_sync(&fctx->allow_block_work); 106 107 nouveau_fence_context_kill(fctx, 0); 107 108 nvif_event_dtor(&fctx->event); 108 109 fctx->dead = 1; ··· 168 167 return ret; 169 168 } 170 169 170 + static void 171 + nouveau_fence_work_allow_block(struct work_struct *work) 172 + { 173 + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, 174 + allow_block_work); 175 + 176 + if (atomic_read(&fctx->notify_ref) == 0) 177 + nvif_event_block(&fctx->event); 178 + else 179 + nvif_event_allow(&fctx->event); 180 + } 181 + 171 182 void 172 183 nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) 173 184 { ··· 191 178 } args; 192 179 int ret; 193 180 181 + INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); 194 182 INIT_LIST_HEAD(&fctx->flip); 195 183 INIT_LIST_HEAD(&fctx->pending); 196 184 spin_lock_init(&fctx->lock); ··· 535 521 struct nouveau_fence *fence = from_fence(f); 536 522 struct nouveau_fence_chan *fctx = nouveau_fctx(fence); 537 523 bool ret; 524 + bool do_work; 538 525 539 - if (!fctx->notify_ref++) 540 - nvif_event_allow(&fctx->event); 526 + if (atomic_inc_return(&fctx->notify_ref) == 0) 527 + do_work = true; 541 528 542 529 ret = nouveau_fence_no_signaling(f); 543 530 if (ret) 544 531 set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); 545 - else if (!--fctx->notify_ref) 546 - nvif_event_block(&fctx->event); 532 + else if (atomic_dec_and_test(&fctx->notify_ref)) 533 + do_work = true; 534 + 535 + if (do_work) 536 + schedule_work(&fctx->allow_block_work); 547 537 548 538 return ret; 549 539 }
+4 -1
drivers/gpu/drm/nouveau/nouveau_fence.h
··· 3 3 #define __NOUVEAU_FENCE_H__ 4 4 5 5 #include <linux/dma-fence.h> 6 + #include <linux/workqueue.h> 6 7 #include <nvif/event.h> 7 8 8 9 struct nouveau_drm; ··· 46 45 char name[32]; 47 46 48 47 struct nvif_event event; 49 - int notify_ref, dead, killed; 48 + struct work_struct allow_block_work; 49 + atomic_t notify_ref; 50 + int dead, killed; 50 51 }; 51 52 52 53 struct nouveau_fence_priv {