Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/msm: Implement preemption for A5XX targets

Implement preemption for A5XX targets - this allows multiple
ringbuffers for different priorities with automatic preemption
of a lower priority ringbuffer if a higher one is ready.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>

authored by

Jordan Crouse and committed by
Rob Clark
b1fc2839 4d87fc32

+599 -20
+1
drivers/gpu/drm/msm/Makefile
··· 8 8 adreno/a4xx_gpu.o \ 9 9 adreno/a5xx_gpu.o \ 10 10 adreno/a5xx_power.o \ 11 + adreno/a5xx_preempt.o \ 11 12 hdmi/hdmi.o \ 12 13 hdmi/hdmi_audio.o \ 13 14 hdmi/hdmi_bridge.o \
+171 -5
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
··· 113 113 return ret; 114 114 } 115 115 116 + static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 117 + { 118 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 119 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 120 + uint32_t wptr; 121 + unsigned long flags; 122 + 123 + spin_lock_irqsave(&ring->lock, flags); 124 + 125 + /* Copy the shadow to the actual register */ 126 + ring->cur = ring->next; 127 + 128 + /* Make sure to wrap wptr if we need to */ 129 + wptr = get_wptr(ring); 130 + 131 + spin_unlock_irqrestore(&ring->lock, flags); 132 + 133 + /* Make sure everything is posted before making a decision */ 134 + mb(); 135 + 136 + /* Update HW if this is the current ring and we are not in preempt */ 137 + if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 138 + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 139 + } 140 + 116 141 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 117 142 struct msm_file_private *ctx) 118 143 { 144 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 145 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 119 146 struct msm_drm_private *priv = gpu->dev->dev_private; 120 147 struct msm_ringbuffer *ring = submit->ring; 121 148 unsigned int i, ibs = 0; 122 149 150 + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 151 + OUT_RING(ring, 0x02); 152 + 153 + /* Turn off protected mode to write to special registers */ 154 + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 155 + OUT_RING(ring, 0); 156 + 157 + /* Set the save preemption record for the ring/command */ 158 + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 159 + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 160 + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 161 + 162 + /* Turn back on protected mode */ 163 + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 164 + OUT_RING(ring, 1); 165 + 166 + /* Enable local preemption for finegrain preemption */ 167 + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 168 + OUT_RING(ring, 0x02); 169 + 170 + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 171 + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 172 + OUT_RING(ring, 0x02); 173 + 174 + /* Submit the commands */ 123 175 for (i = 0; i < submit->nr_cmds; i++) { 124 176 switch (submit->cmd[i].type) { 125 177 case MSM_SUBMIT_CMD_IB_TARGET_BUF: ··· 189 137 } 190 138 } 191 139 140 + /* 141 + * Write the render mode to NULL (0) to indicate to the CP that the IBs 142 + * are done rendering - otherwise a lucky preemption would start 143 + * replaying from the last checkpoint 144 + */ 145 + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 146 + OUT_RING(ring, 0); 147 + OUT_RING(ring, 0); 148 + OUT_RING(ring, 0); 149 + OUT_RING(ring, 0); 150 + OUT_RING(ring, 0); 151 + 152 + /* Turn off IB level preemptions */ 153 + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 154 + OUT_RING(ring, 0x01); 155 + 156 + /* Write the fence to the scratch register */ 192 157 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 193 158 OUT_RING(ring, submit->seqno); 194 159 160 + /* 161 + * Execute a CACHE_FLUSH_TS event. This will ensure that the 162 + * timestamp is written to the memory and then triggers the interrupt 163 + */ 195 164 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 196 165 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); 197 166 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 198 167 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 199 168 OUT_RING(ring, submit->seqno); 200 169 201 - gpu->funcs->flush(gpu, ring); 170 + /* Yield the floor on command completion */ 171 + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 172 + /* 173 + * If dword[2:1] are non zero, they specify an address for the CP to 174 + * write the value of dword[3] to on preemption complete. Write 0 to 175 + * skip the write 176 + */ 177 + OUT_RING(ring, 0x00); 178 + OUT_RING(ring, 0x00); 179 + /* Data value - not used if the address above is 0 */ 180 + OUT_RING(ring, 0x01); 181 + /* Set bit 0 to trigger an interrupt on preempt complete */ 182 + OUT_RING(ring, 0x01); 183 + 184 + a5xx_flush(gpu, ring); 185 + 186 + /* Check to see if we need to start preemption */ 187 + a5xx_preempt_trigger(gpu); 202 188 } 203 189 204 190 static const struct { ··· 387 297 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 388 298 } 389 299 300 + static int a5xx_preempt_start(struct msm_gpu *gpu) 301 + { 302 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 303 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 304 + struct msm_ringbuffer *ring = gpu->rb[0]; 305 + 306 + if (gpu->nr_rings == 1) 307 + return 0; 308 + 309 + /* Turn off protected mode to write to special registers */ 310 + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 311 + OUT_RING(ring, 0); 312 + 313 + /* Set the save preemption record for the ring/command */ 314 + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 315 + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 316 + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 317 + 318 + /* Turn back on protected mode */ 319 + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 320 + OUT_RING(ring, 1); 321 + 322 + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 323 + OUT_RING(ring, 0x00); 324 + 325 + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 326 + OUT_RING(ring, 0x01); 327 + 328 + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 329 + OUT_RING(ring, 0x01); 330 + 331 + /* Yield the floor on command completion */ 332 + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 333 + OUT_RING(ring, 0x00); 334 + OUT_RING(ring, 0x00); 335 + OUT_RING(ring, 0x01); 336 + OUT_RING(ring, 0x01); 337 + 338 + gpu->funcs->flush(gpu, ring); 339 + 340 + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 341 + } 342 + 343 + 390 344 static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, 391 345 const struct firmware *fw, u64 *iova) 392 346 { ··· 546 412 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 547 413 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 548 414 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 415 + A5XX_RBBM_INT_0_MASK_CP_SW | \ 549 416 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 550 417 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 551 418 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) ··· 691 556 if (ret) 692 557 return ret; 693 558 559 + a5xx_preempt_hw_init(gpu); 560 + 694 561 a5xx_gpmu_ucode_init(gpu); 695 562 696 563 ret = a5xx_ucode_init(gpu); ··· 747 610 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 748 611 } 749 612 613 + /* Last step - yield the ringbuffer */ 614 + a5xx_preempt_start(gpu); 615 + 750 616 return 0; 751 617 } 752 618 ··· 779 639 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 780 640 781 641 DBG("%s", gpu->name); 642 + 643 + a5xx_preempt_fini(gpu); 782 644 783 645 if (a5xx_gpu->pm4_bo) { 784 646 if (a5xx_gpu->pm4_iova) ··· 819 677 820 678 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 821 679 { 680 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 681 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 682 + 683 + if (ring != a5xx_gpu->cur_ring) { 684 + WARN(1, "Tried to idle a non-current ringbuffer\n"); 685 + return false; 686 + } 687 + 822 688 /* wait for CP to drain ringbuffer: */ 823 689 if (!adreno_idle(gpu, ring)) 824 690 return false; ··· 1021 871 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1022 872 a5xx_gpmu_err_irq(gpu); 1023 873 1024 - if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) 874 + if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 875 + a5xx_preempt_trigger(gpu); 1025 876 msm_gpu_retire(gpu); 877 + } 878 + 879 + if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 880 + a5xx_preempt_irq(gpu); 1026 881 1027 882 return IRQ_HANDLED; 1028 883 } ··· 1157 1002 } 1158 1003 #endif 1159 1004 1005 + static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1006 + { 1007 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1008 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1009 + 1010 + return a5xx_gpu->cur_ring; 1011 + } 1012 + 1160 1013 static const struct adreno_gpu_funcs funcs = { 1161 1014 .base = { 1162 1015 .get_param = adreno_get_param, ··· 1173 1010 .pm_resume = a5xx_pm_resume, 1174 1011 .recover = a5xx_recover, 1175 1012 .submit = a5xx_submit, 1176 - .flush = adreno_flush, 1177 - .active_ring = adreno_active_ring, 1013 + .flush = a5xx_flush, 1014 + .active_ring = a5xx_active_ring, 1178 1015 .irq = a5xx_irq, 1179 1016 .destroy = a5xx_destroy, 1180 1017 #ifdef CONFIG_DEBUG_FS ··· 1210 1047 1211 1048 a5xx_gpu->lm_leakage = 0x4E001A; 1212 1049 1213 - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 1050 + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1214 1051 if (ret) { 1215 1052 a5xx_destroy(&(a5xx_gpu->base.base)); 1216 1053 return ERR_PTR(ret); ··· 1218 1055 1219 1056 if (gpu->aspace) 1220 1057 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1058 + 1059 + /* Set up the preemption specific bits and pieces for each ringbuffer */ 1060 + a5xx_preempt_init(gpu); 1221 1061 1222 1062 return gpu; 1223 1063 }
+106 -1
drivers/gpu/drm/msm/adreno/a5xx_gpu.h
··· 1 - /* Copyright (c) 2016 The Linux Foundation. All rights reserved. 1 + /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 2 2 * 3 3 * This program is free software; you can redistribute it and/or modify 4 4 * it under the terms of the GNU General Public License version 2 and ··· 35 35 uint32_t gpmu_dwords; 36 36 37 37 uint32_t lm_leakage; 38 + 39 + struct msm_ringbuffer *cur_ring; 40 + struct msm_ringbuffer *next_ring; 41 + 42 + struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; 43 + struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; 44 + uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; 45 + 46 + atomic_t preempt_state; 47 + struct timer_list preempt_timer; 38 48 }; 39 49 40 50 #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) 51 + 52 + /* 53 + * In order to do lockless preemption we use a simple state machine to progress 54 + * through the process. 55 + * 56 + * PREEMPT_NONE - no preemption in progress. Next state START. 57 + * PREEMPT_START - The trigger is evaulating if preemption is possible. Next 58 + * states: TRIGGERED, NONE 59 + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next 60 + * state: NONE. 61 + * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next 62 + * states: FAULTED, PENDING 63 + * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger 64 + * recovery. Next state: N/A 65 + * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is 66 + * checking the success of the operation. Next state: FAULTED, NONE. 67 + */ 68 + 69 + enum preempt_state { 70 + PREEMPT_NONE = 0, 71 + PREEMPT_START, 72 + PREEMPT_ABORT, 73 + PREEMPT_TRIGGERED, 74 + PREEMPT_FAULTED, 75 + PREEMPT_PENDING, 76 + }; 77 + 78 + /* 79 + * struct a5xx_preempt_record is a shared buffer between the microcode and the 80 + * CPU to store the state for preemption. The record itself is much larger 81 + * (64k) but most of that is used by the CP for storage. 82 + * 83 + * There is a preemption record assigned per ringbuffer. When the CPU triggers a 84 + * preemption, it fills out the record with the useful information (wptr, ring 85 + * base, etc) and the microcode uses that information to set up the CP following 86 + * the preemption. When a ring is switched out, the CP will save the ringbuffer 87 + * state back to the record. In this way, once the records are properly set up 88 + * the CPU can quickly switch back and forth between ringbuffers by only 89 + * updating a few registers (often only the wptr). 90 + * 91 + * These are the CPU aware registers in the record: 92 + * @magic: Must always be 0x27C4BAFC 93 + * @info: Type of the record - written 0 by the CPU, updated by the CP 94 + * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by 95 + * the CP 96 + * @cntl: Value of RB_CNTL written by CPU, save/restored by CP 97 + * @rptr: Value of RB_RPTR written by CPU, save/restored by CP 98 + * @wptr: Value of RB_WPTR written by CPU, save/restored by CP 99 + * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP 100 + * @rbase: Value of RB_BASE written by CPU, save/restored by CP 101 + * @counter: GPU address of the storage area for the performance counters 102 + */ 103 + struct a5xx_preempt_record { 104 + uint32_t magic; 105 + uint32_t info; 106 + uint32_t data; 107 + uint32_t cntl; 108 + uint32_t rptr; 109 + uint32_t wptr; 110 + uint64_t rptr_addr; 111 + uint64_t rbase; 112 + uint64_t counter; 113 + }; 114 + 115 + /* Magic identifier for the preemption record */ 116 + #define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL 117 + 118 + /* 119 + * Even though the structure above is only a few bytes, we need a full 64k to 120 + * store the entire preemption record from the CP 121 + */ 122 + #define A5XX_PREEMPT_RECORD_SIZE (64 * 1024) 123 + 124 + /* 125 + * The preemption counter block is a storage area for the value of the 126 + * preemption counters that are saved immediately before context switch. We 127 + * append it on to the end of the allocation for the preemption record. 128 + */ 129 + #define A5XX_PREEMPT_COUNTER_SIZE (16 * 4) 130 + 41 131 42 132 int a5xx_power_init(struct msm_gpu *gpu); 43 133 void a5xx_gpmu_ucode_init(struct msm_gpu *gpu); ··· 147 57 148 58 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 149 59 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); 60 + 61 + void a5xx_preempt_init(struct msm_gpu *gpu); 62 + void a5xx_preempt_hw_init(struct msm_gpu *gpu); 63 + void a5xx_preempt_trigger(struct msm_gpu *gpu); 64 + void a5xx_preempt_irq(struct msm_gpu *gpu); 65 + void a5xx_preempt_fini(struct msm_gpu *gpu); 66 + 67 + /* Return true if we are in a preempt state */ 68 + static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) 69 + { 70 + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); 71 + 72 + return !(preempt_state == PREEMPT_NONE || 73 + preempt_state == PREEMPT_ABORT); 74 + } 150 75 151 76 #endif /* __A5XX_GPU_H__ */
+305
drivers/gpu/drm/msm/adreno/a5xx_preempt.c
··· 1 + /* Copyright (c) 2017 The Linux Foundation. All rights reserved. 2 + * 3 + * This program is free software; you can redistribute it and/or modify 4 + * it under the terms of the GNU General Public License version 2 and 5 + * only version 2 as published by the Free Software Foundation. 6 + * 7 + * This program is distributed in the hope that it will be useful, 8 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 + * GNU General Public License for more details. 11 + * 12 + */ 13 + 14 + #include "msm_gem.h" 15 + #include "a5xx_gpu.h" 16 + 17 + /* 18 + * Try to transition the preemption state from old to new. Return 19 + * true on success or false if the original state wasn't 'old' 20 + */ 21 + static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu, 22 + enum preempt_state old, enum preempt_state new) 23 + { 24 + enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state, 25 + old, new); 26 + 27 + return (cur == old); 28 + } 29 + 30 + /* 31 + * Force the preemption state to the specified state. This is used in cases 32 + * where the current state is known and won't change 33 + */ 34 + static inline void set_preempt_state(struct a5xx_gpu *gpu, 35 + enum preempt_state new) 36 + { 37 + /* 38 + * preempt_state may be read by other cores trying to trigger a 39 + * preemption or in the interrupt handler so barriers are needed 40 + * before... 41 + */ 42 + smp_mb__before_atomic(); 43 + atomic_set(&gpu->preempt_state, new); 44 + /* ... and after*/ 45 + smp_mb__after_atomic(); 46 + } 47 + 48 + /* Write the most recent wptr for the given ring into the hardware */ 49 + static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 50 + { 51 + unsigned long flags; 52 + uint32_t wptr; 53 + 54 + if (!ring) 55 + return; 56 + 57 + spin_lock_irqsave(&ring->lock, flags); 58 + wptr = get_wptr(ring); 59 + spin_unlock_irqrestore(&ring->lock, flags); 60 + 61 + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 62 + } 63 + 64 + /* Return the highest priority ringbuffer with something in it */ 65 + static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) 66 + { 67 + unsigned long flags; 68 + int i; 69 + 70 + for (i = 0; i < gpu->nr_rings; i++) { 71 + bool empty; 72 + struct msm_ringbuffer *ring = gpu->rb[i]; 73 + 74 + spin_lock_irqsave(&ring->lock, flags); 75 + empty = (get_wptr(ring) == ring->memptrs->rptr); 76 + spin_unlock_irqrestore(&ring->lock, flags); 77 + 78 + if (!empty) 79 + return ring; 80 + } 81 + 82 + return NULL; 83 + } 84 + 85 + static void a5xx_preempt_timer(unsigned long data) 86 + { 87 + struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data; 88 + struct msm_gpu *gpu = &a5xx_gpu->base.base; 89 + struct drm_device *dev = gpu->dev; 90 + struct msm_drm_private *priv = dev->dev_private; 91 + 92 + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) 93 + return; 94 + 95 + dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); 96 + queue_work(priv->wq, &gpu->recover_work); 97 + } 98 + 99 + /* Try to trigger a preemption switch */ 100 + void a5xx_preempt_trigger(struct msm_gpu *gpu) 101 + { 102 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 103 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 104 + unsigned long flags; 105 + struct msm_ringbuffer *ring; 106 + 107 + if (gpu->nr_rings == 1) 108 + return; 109 + 110 + /* 111 + * Try to start preemption by moving from NONE to START. If 112 + * unsuccessful, a preemption is already in flight 113 + */ 114 + if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START)) 115 + return; 116 + 117 + /* Get the next ring to preempt to */ 118 + ring = get_next_ring(gpu); 119 + 120 + /* 121 + * If no ring is populated or the highest priority ring is the current 122 + * one do nothing except to update the wptr to the latest and greatest 123 + */ 124 + if (!ring || (a5xx_gpu->cur_ring == ring)) { 125 + /* 126 + * Its possible that while a preemption request is in progress 127 + * from an irq context, a user context trying to submit might 128 + * fail to update the write pointer, because it determines 129 + * that the preempt state is not PREEMPT_NONE. 130 + * 131 + * Close the race by introducing an intermediate 132 + * state PREEMPT_ABORT to let the submit path 133 + * know that the ringbuffer is not going to change 134 + * and can safely update the write pointer. 135 + */ 136 + 137 + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); 138 + update_wptr(gpu, a5xx_gpu->cur_ring); 139 + set_preempt_state(a5xx_gpu, PREEMPT_NONE); 140 + return; 141 + } 142 + 143 + /* Make sure the wptr doesn't update while we're in motion */ 144 + spin_lock_irqsave(&ring->lock, flags); 145 + a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); 146 + spin_unlock_irqrestore(&ring->lock, flags); 147 + 148 + /* Set the address of the incoming preemption record */ 149 + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, 150 + REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, 151 + a5xx_gpu->preempt_iova[ring->id]); 152 + 153 + a5xx_gpu->next_ring = ring; 154 + 155 + /* Start a timer to catch a stuck preemption */ 156 + mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); 157 + 158 + /* Set the preemption state to triggered */ 159 + set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED); 160 + 161 + /* Make sure everything is written before hitting the button */ 162 + wmb(); 163 + 164 + /* And actually start the preemption */ 165 + gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1); 166 + } 167 + 168 + void a5xx_preempt_irq(struct msm_gpu *gpu) 169 + { 170 + uint32_t status; 171 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 172 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 173 + struct drm_device *dev = gpu->dev; 174 + struct msm_drm_private *priv = dev->dev_private; 175 + 176 + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) 177 + return; 178 + 179 + /* Delete the preemption watchdog timer */ 180 + del_timer(&a5xx_gpu->preempt_timer); 181 + 182 + /* 183 + * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before 184 + * firing the interrupt, but there is a non zero chance of a hardware 185 + * condition or a software race that could set it again before we have a 186 + * chance to finish. If that happens, log and go for recovery 187 + */ 188 + status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL); 189 + if (unlikely(status)) { 190 + set_preempt_state(a5xx_gpu, PREEMPT_FAULTED); 191 + dev_err(dev->dev, "%s: Preemption failed to complete\n", 192 + gpu->name); 193 + queue_work(priv->wq, &gpu->recover_work); 194 + return; 195 + } 196 + 197 + a5xx_gpu->cur_ring = a5xx_gpu->next_ring; 198 + a5xx_gpu->next_ring = NULL; 199 + 200 + update_wptr(gpu, a5xx_gpu->cur_ring); 201 + 202 + set_preempt_state(a5xx_gpu, PREEMPT_NONE); 203 + } 204 + 205 + void a5xx_preempt_hw_init(struct msm_gpu *gpu) 206 + { 207 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 208 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 209 + int i; 210 + 211 + for (i = 0; i < gpu->nr_rings; i++) { 212 + a5xx_gpu->preempt[i]->wptr = 0; 213 + a5xx_gpu->preempt[i]->rptr = 0; 214 + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; 215 + } 216 + 217 + /* Write a 0 to signal that we aren't switching pagetables */ 218 + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 219 + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); 220 + 221 + /* Reset the preemption state */ 222 + set_preempt_state(a5xx_gpu, PREEMPT_NONE); 223 + 224 + /* Always come up on rb 0 */ 225 + a5xx_gpu->cur_ring = gpu->rb[0]; 226 + } 227 + 228 + static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, 229 + struct msm_ringbuffer *ring) 230 + { 231 + struct adreno_gpu *adreno_gpu = &a5xx_gpu->base; 232 + struct msm_gpu *gpu = &adreno_gpu->base; 233 + struct a5xx_preempt_record *ptr; 234 + struct drm_gem_object *bo = NULL; 235 + u64 iova = 0; 236 + 237 + ptr = msm_gem_kernel_new(gpu->dev, 238 + A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE, 239 + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); 240 + 241 + if (IS_ERR(ptr)) 242 + return PTR_ERR(ptr); 243 + 244 + a5xx_gpu->preempt_bo[ring->id] = bo; 245 + a5xx_gpu->preempt_iova[ring->id] = iova; 246 + a5xx_gpu->preempt[ring->id] = ptr; 247 + 248 + /* Set up the defaults on the preemption record */ 249 + 250 + ptr->magic = A5XX_PREEMPT_RECORD_MAGIC; 251 + ptr->info = 0; 252 + ptr->data = 0; 253 + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; 254 + ptr->rptr_addr = rbmemptr(ring, rptr); 255 + ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE; 256 + 257 + return 0; 258 + } 259 + 260 + void a5xx_preempt_fini(struct msm_gpu *gpu) 261 + { 262 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 263 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 264 + int i; 265 + 266 + for (i = 0; i < gpu->nr_rings; i++) { 267 + if (!a5xx_gpu->preempt_bo[i]) 268 + continue; 269 + 270 + msm_gem_put_vaddr(a5xx_gpu->preempt_bo[i]); 271 + 272 + if (a5xx_gpu->preempt_iova[i]) 273 + msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace); 274 + 275 + drm_gem_object_unreference(a5xx_gpu->preempt_bo[i]); 276 + a5xx_gpu->preempt_bo[i] = NULL; 277 + } 278 + } 279 + 280 + void a5xx_preempt_init(struct msm_gpu *gpu) 281 + { 282 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 283 + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 284 + int i; 285 + 286 + /* No preemption if we only have one ring */ 287 + if (gpu->nr_rings <= 1) 288 + return; 289 + 290 + for (i = 0; i < gpu->nr_rings; i++) { 291 + if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) { 292 + /* 293 + * On any failure our adventure is over. Clean up and 294 + * set nr_rings to 1 to force preemption off 295 + */ 296 + a5xx_preempt_fini(gpu); 297 + gpu->nr_rings = 1; 298 + 299 + return; 300 + } 301 + } 302 + 303 + setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, 304 + (unsigned long) a5xx_gpu); 305 + }
+5 -9
drivers/gpu/drm/msm/adreno/adreno_gpu.c
··· 217 217 return 0; 218 218 } 219 219 220 - static uint32_t get_wptr(struct msm_ringbuffer *ring) 221 - { 222 - return ring->cur - ring->start; 223 - } 224 - 225 220 /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ 226 221 static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, 227 222 struct msm_ringbuffer *ring) ··· 271 276 case MSM_SUBMIT_CMD_BUF: 272 277 OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? 273 278 CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); 274 - OUT_RING(ring, submit->cmd[i].iova); 279 + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 275 280 OUT_RING(ring, submit->cmd[i].size); 276 281 OUT_PKT2(ring); 277 282 break; ··· 338 343 * to account for the possibility that the last command fit exactly into 339 344 * the ringbuffer and rb->next hasn't wrapped to zero yet 340 345 */ 341 - wptr = (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); 346 + wptr = get_wptr(ring); 342 347 343 348 /* ensure writes to ringbuffer have hit system memory: */ 344 349 mb(); ··· 356 361 return true; 357 362 358 363 /* TODO maybe we need to reset GPU here to recover from hang? */ 359 - DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name, 360 - ring->id); 364 + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", 365 + gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); 366 + 361 367 return false; 362 368 } 363 369
+6 -1
drivers/gpu/drm/msm/adreno/adreno_gpu.h
··· 2 2 * Copyright (C) 2013 Red Hat 3 3 * Author: Rob Clark <robdclark@gmail.com> 4 4 * 5 - * Copyright (c) 2014 The Linux Foundation. All rights reserved. 5 + * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. 6 6 * 7 7 * This program is free software; you can redistribute it and/or modify it 8 8 * under the terms of the GNU General Public License version 2 as published by ··· 330 330 { 331 331 adreno_gpu_write(gpu, lo, lower_32_bits(data)); 332 332 adreno_gpu_write(gpu, hi, upper_32_bits(data)); 333 + } 334 + 335 + static inline uint32_t get_wptr(struct msm_ringbuffer *ring) 336 + { 337 + return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); 333 338 } 334 339 335 340 /*
+1 -1
drivers/gpu/drm/msm/msm_drv.h
··· 74 74 spinlock_t lock; 75 75 }; 76 76 77 - #define MSM_GPU_MAX_RINGS 1 77 + #define MSM_GPU_MAX_RINGS 4 78 78 79 79 struct msm_drm_private { 80 80
+2 -3
drivers/gpu/drm/msm/msm_gpu.c
··· 295 295 * Replay all remaining submits starting with highest priority 296 296 * ring 297 297 */ 298 - 299 - for (i = gpu->nr_rings - 1; i >= 0; i--) { 298 + for (i = 0; i < gpu->nr_rings; i++) { 300 299 struct msm_ringbuffer *ring = gpu->rb[i]; 301 300 302 301 list_for_each_entry(submit, &ring->submits, node) ··· 475 476 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 476 477 477 478 /* Retire the commits starting with highest priority */ 478 - for (i = gpu->nr_rings - 1; i >= 0; i--) { 479 + for (i = 0; i < gpu->nr_rings; i++) { 479 480 struct msm_ringbuffer *ring = gpu->rb[i]; 480 481 481 482 list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
+1
drivers/gpu/drm/msm/msm_ringbuffer.c
··· 53 53 ring->memptrs_iova = memptrs_iova; 54 54 55 55 INIT_LIST_HEAD(&ring->submits); 56 + spin_lock_init(&ring->lock); 56 57 57 58 snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); 58 59
+1
drivers/gpu/drm/msm/msm_ringbuffer.h
··· 40 40 struct msm_rbmemptrs *memptrs; 41 41 uint64_t memptrs_iova; 42 42 struct msm_fence_context *fctx; 43 + spinlock_t lock; 43 44 }; 44 45 45 46 struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,