Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: handle IH ring1 overflow

IH ring1 is used to process GPU retry fault, overflow is enabled to
drain retry fault because we want receive other interrupts while
handling retry fault to recover range. There is no overflow flag set
when wptr pass rptr. Use timestamp of rptr and wptr to handle overflow
and drain retry fault.

If fault timestamp goes backward, the fault is filtered and should not
be processed. Drain fault is finished if processed_timestamp is equal to
or larger than checkpoint timestamp.

Add amdgpu_ih_functions interface decode_iv_ts for different chips to
get timestamp from IV entry with different iv size and timestamp offset.
amdgpu_ih_decode_iv_ts_helper is used for vega10, vega20, navi10.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
3c2d6ea2 232d1d43

+60 -37
+7 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 350 350 * amdgpu_gmc_filter_faults - filter VM faults 351 351 * 352 352 * @adev: amdgpu device structure 353 + * @ih: interrupt ring that the fault received from 353 354 * @addr: address of the VM fault 354 355 * @pasid: PASID of the process causing the fault 355 356 * @timestamp: timestamp of the fault ··· 359 358 * True if the fault was filtered and should not be processed further. 360 359 * False if the fault is a new one and needs to be handled. 361 360 */ 362 - bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, 361 + bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, 362 + struct amdgpu_ih_ring *ih, uint64_t addr, 363 363 uint16_t pasid, uint64_t timestamp) 364 364 { 365 365 struct amdgpu_gmc *gmc = &adev->gmc; 366 366 uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); 367 367 struct amdgpu_gmc_fault *fault; 368 368 uint32_t hash; 369 + 370 + /* Stale retry fault if timestamp goes backward */ 371 + if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp)) 372 + return true; 369 373 370 374 /* If we don't have space left in the ring buffer return immediately */ 371 375 stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
··· 316 316 struct amdgpu_gmc *mc); 317 317 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, 318 318 struct amdgpu_gmc *mc); 319 - bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, 319 + bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, 320 + struct amdgpu_ih_ring *ih, uint64_t addr, 320 321 uint16_t pasid, uint64_t timestamp); 321 322 void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, 322 323 uint16_t pasid);
+25 -30
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
··· 164 164 } 165 165 } 166 166 167 - /* Waiter helper that checks current rptr matches or passes checkpoint wptr */ 168 - static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, 169 - struct amdgpu_ih_ring *ih, 170 - uint32_t checkpoint_wptr, 171 - uint32_t *prev_rptr) 172 - { 173 - uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask); 174 - 175 - /* rptr has wrapped. */ 176 - if (cur_rptr < *prev_rptr) 177 - cur_rptr += ih->ptr_mask + 1; 178 - *prev_rptr = cur_rptr; 179 - 180 - /* check ring is empty to workaround missing wptr overflow flag */ 181 - return cur_rptr >= checkpoint_wptr || 182 - (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); 183 - } 184 - 185 167 /** 186 - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint 168 + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint 187 169 * 188 170 * @adev: amdgpu_device pointer 189 171 * @ih: ih ring to process 190 172 * 191 173 * Used to ensure ring has processed IVs up to the checkpoint write pointer. 192 174 */ 193 - int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, 175 + int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, 194 176 struct amdgpu_ih_ring *ih) 195 177 { 196 - uint32_t checkpoint_wptr, rptr; 178 + uint32_t checkpoint_wptr; 179 + uint64_t checkpoint_ts; 180 + long timeout = HZ; 197 181 198 182 if (!ih->enabled || adev->shutdown) 199 183 return -ENODEV; 200 184 201 185 checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); 202 - /* Order wptr with rptr. */ 186 + /* Order wptr with ring data. */ 203 187 rmb(); 204 - rptr = READ_ONCE(ih->rptr); 188 + checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); 205 189 206 - /* wptr has wrapped. */ 207 - if (rptr > checkpoint_wptr) 208 - checkpoint_wptr += ih->ptr_mask + 1; 209 - 210 - return wait_event_interruptible(ih->wait_process, 211 - amdgpu_ih_has_checkpoint_processed(adev, ih, 212 - checkpoint_wptr, &rptr)); 190 + return wait_event_interruptible_timeout(ih->wait_process, 191 + !amdgpu_ih_ts_after(ih->processed_timestamp, checkpoint_ts), 192 + timeout); 213 193 } 214 194 215 195 /** ··· 278 298 279 299 /* wptr/rptr are in bytes! */ 280 300 ih->rptr += 32; 301 + } 302 + 303 + uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, 304 + signed int offset) 305 + { 306 + uint32_t iv_size = 32; 307 + uint32_t ring_index; 308 + uint32_t dw1, dw2; 309 + 310 + rptr += iv_size * offset; 311 + ring_index = (rptr & ih->ptr_mask) >> 2; 312 + 313 + dw1 = le32_to_cpu(ih->ring[ring_index + 1]); 314 + dw2 = le32_to_cpu(ih->ring[ring_index + 2]); 315 + return dw1 | ((u64)(dw2 & 0xffff) << 32); 281 316 }
+14 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
··· 68 68 69 69 /* For waiting on IH processing at checkpoint. */ 70 70 wait_queue_head_t wait_process; 71 + uint64_t processed_timestamp; 71 72 }; 73 + 74 + /* return true if time stamp t2 is after t1 with 48bit wrap around */ 75 + #define amdgpu_ih_ts_after(t1, t2) \ 76 + (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL) 72 77 73 78 /* provided by the ih block */ 74 79 struct amdgpu_ih_funcs { ··· 81 76 u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); 82 77 void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, 83 78 struct amdgpu_iv_entry *entry); 79 + uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr, 80 + signed int offset); 84 81 void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); 85 82 }; 86 83 87 84 #define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih)) 88 85 #define amdgpu_ih_decode_iv(adev, iv) \ 89 86 (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv)) 87 + #define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \ 88 + (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \ 89 + (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset))) 90 90 #define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih)) 91 91 92 92 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ··· 99 89 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); 100 90 void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, 101 91 unsigned int num_dw); 102 - int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, 103 - struct amdgpu_ih_ring *ih); 92 + int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, 93 + struct amdgpu_ih_ring *ih); 104 94 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); 105 95 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, 106 96 struct amdgpu_ih_ring *ih, 107 97 struct amdgpu_iv_entry *entry); 98 + uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, 99 + signed int offset); 108 100 #endif
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
··· 528 528 /* Send it to amdkfd as well if it isn't already handled */ 529 529 if (!handled) 530 530 amdgpu_amdkfd_interrupt(adev, entry.iv_entry); 531 + 532 + dev_WARN_ONCE(adev->dev, ih->processed_timestamp == entry.timestamp, 533 + "IH timestamps are not unique"); 534 + 535 + if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp)) 536 + ih->processed_timestamp = entry.timestamp; 531 537 } 532 538 533 539 /**
+1 -1
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 107 107 108 108 /* Process it onyl if it's the first fault for this address */ 109 109 if (entry->ih != &adev->irq.ih_soft && 110 - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, 110 + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, 111 111 entry->timestamp)) 112 112 return 1; 113 113
+1 -1
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 523 523 524 524 /* Process it onyl if it's the first fault for this address */ 525 525 if (entry->ih != &adev->irq.ih_soft && 526 - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, 526 + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, 527 527 entry->timestamp)) 528 528 return 1; 529 529
+1
drivers/gpu/drm/amd/amdgpu/navi10_ih.c
··· 716 716 static const struct amdgpu_ih_funcs navi10_ih_funcs = { 717 717 .get_wptr = navi10_ih_get_wptr, 718 718 .decode_iv = amdgpu_ih_decode_iv_helper, 719 + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, 719 720 .set_rptr = navi10_ih_set_rptr 720 721 }; 721 722
+1
drivers/gpu/drm/amd/amdgpu/vega10_ih.c
··· 640 640 static const struct amdgpu_ih_funcs vega10_ih_funcs = { 641 641 .get_wptr = vega10_ih_get_wptr, 642 642 .decode_iv = amdgpu_ih_decode_iv_helper, 643 + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, 643 644 .set_rptr = vega10_ih_set_rptr 644 645 }; 645 646
+1
drivers/gpu/drm/amd/amdgpu/vega20_ih.c
··· 688 688 static const struct amdgpu_ih_funcs vega20_ih_funcs = { 689 689 .get_wptr = vega20_ih_get_wptr, 690 690 .decode_iv = amdgpu_ih_decode_iv_helper, 691 + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, 691 692 .set_rptr = vega20_ih_set_rptr 692 693 }; 693 694
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 1974 1974 1975 1975 pr_debug("drain retry fault gpu %d svms %p\n", i, svms); 1976 1976 1977 - amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev, 1977 + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, 1978 1978 &pdd->dev->adev->irq.ih1); 1979 1979 pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); 1980 1980 }