drm/vc4: Add support for drawing 3D frames.

+7

drivers/gpu/drm/vc4/Makefile

··· 8 8 vc4_crtc.o \ 9 9 vc4_drv.o \ 10 10 vc4_kms.o \ 11 + vc4_gem.o \ 11 12 vc4_hdmi.o \ 12 13 vc4_hvs.o \ 14 + vc4_irq.o \ 13 15 vc4_plane.o \ 16 + vc4_render_cl.o \ 17 + vc4_trace_points.o \ 14 18 vc4_v3d.o \ 19 + vc4_validate.o \ 15 20 vc4_validate_shaders.o 16 21 17 22 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o 18 23 19 24 obj-$(CONFIG_DRM_VC4) += vc4.o 25 + 26 + CFLAGS_vc4_trace_points.o := -I$(src)

+14 -1

drivers/gpu/drm/vc4/vc4_drv.c

··· 74 74 }; 75 75 76 76 static const struct drm_ioctl_desc vc4_drm_ioctls[] = { 77 + DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0), 78 + DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0), 79 + DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0), 77 80 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), 78 81 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), 79 82 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), ··· 86 83 .driver_features = (DRIVER_MODESET | 87 84 DRIVER_ATOMIC | 88 85 DRIVER_GEM | 86 + DRIVER_HAVE_IRQ | 89 87 DRIVER_PRIME), 90 88 .lastclose = vc4_lastclose, 91 89 .preclose = vc4_drm_preclose, 90 + 91 + .irq_handler = vc4_irq, 92 + .irq_preinstall = vc4_irq_preinstall, 93 + .irq_postinstall = vc4_irq_postinstall, 94 + .irq_uninstall = vc4_irq_uninstall, 92 95 93 96 .enable_vblank = vc4_enable_vblank, 94 97 .disable_vblank = vc4_disable_vblank, ··· 190 181 if (ret) 191 182 goto unref; 192 183 184 + vc4_gem_init(drm); 185 + 193 186 ret = component_bind_all(dev, drm); 194 187 if (ret) 195 - goto unref; 188 + goto gem_destroy; 196 189 197 190 ret = drm_dev_register(drm, 0); 198 191 if (ret < 0) ··· 218 207 drm_dev_unregister(drm); 219 208 unbind_all: 220 209 component_unbind_all(dev, drm); 210 + gem_destroy: 211 + vc4_gem_destroy(drm); 221 212 unref: 222 213 drm_dev_unref(drm); 223 214 vc4_bo_cache_destroy(drm);

+182

drivers/gpu/drm/vc4/vc4_drv.h

··· 49 49 50 50 /* Protects bo_cache and the BO stats. */ 51 51 struct mutex bo_lock; 52 + 53 + /* Sequence number for the last job queued in job_list. 54 + * Starts at 0 (no jobs emitted). 55 + */ 56 + uint64_t emit_seqno; 57 + 58 + /* Sequence number for the last completed job on the GPU. 59 + * Starts at 0 (no jobs completed). 60 + */ 61 + uint64_t finished_seqno; 62 + 63 + /* List of all struct vc4_exec_info for jobs to be executed. 64 + * The first job in the list is the one currently programmed 65 + * into ct0ca/ct1ca for execution. 66 + */ 67 + struct list_head job_list; 68 + /* List of the finished vc4_exec_infos waiting to be freed by 69 + * job_done_work. 70 + */ 71 + struct list_head job_done_list; 72 + /* Spinlock used to synchronize the job_list and seqno 73 + * accesses between the IRQ handler and GEM ioctls. 74 + */ 75 + spinlock_t job_lock; 76 + wait_queue_head_t job_wait_queue; 77 + struct work_struct job_done_work; 78 + 79 + /* The binner overflow memory that's currently set up in 80 + * BPOA/BPOS registers. When overflow occurs and a new one is 81 + * allocated, the previous one will be moved to 82 + * vc4->current_exec's free list. 83 + */ 84 + struct vc4_bo *overflow_mem; 85 + struct work_struct overflow_mem_work; 86 + 87 + struct { 88 + uint32_t last_ct0ca, last_ct1ca; 89 + struct timer_list timer; 90 + struct work_struct reset_work; 91 + } hangcheck; 92 + 93 + struct semaphore async_modeset; 52 94 }; 53 95 54 96 static inline struct vc4_dev * ··· 101 59 102 60 struct vc4_bo { 103 61 struct drm_gem_cma_object base; 62 + 63 + /* seqno of the last job to render to this BO. */ 64 + uint64_t seqno; 104 65 105 66 /* List entry for the BO's position in either 106 67 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list ··· 174 129 #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset) 175 130 #define HVS_READ(offset) readl(vc4->hvs->regs + offset) 176 131 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) 132 + 133 + struct vc4_exec_info { 134 + /* Sequence number for this bin/render job. */ 135 + uint64_t seqno; 136 + 137 + /* Kernel-space copy of the ioctl arguments */ 138 + struct drm_vc4_submit_cl *args; 139 + 140 + /* This is the array of BOs that were looked up at the start of exec. 141 + * Command validation will use indices into this array. 142 + */ 143 + struct drm_gem_cma_object **bo; 144 + uint32_t bo_count; 145 + 146 + /* Pointers for our position in vc4->job_list */ 147 + struct list_head head; 148 + 149 + /* List of other BOs used in the job that need to be released 150 + * once the job is complete. 151 + */ 152 + struct list_head unref_list; 153 + 154 + /* Current unvalidated indices into @bo loaded by the non-hardware 155 + * VC4_PACKET_GEM_HANDLES. 156 + */ 157 + uint32_t bo_index[2]; 158 + 159 + /* This is the BO where we store the validated command lists, shader 160 + * records, and uniforms. 161 + */ 162 + struct drm_gem_cma_object *exec_bo; 163 + 164 + /** 165 + * This tracks the per-shader-record state (packet 64) that 166 + * determines the length of the shader record and the offset 167 + * it's expected to be found at. It gets read in from the 168 + * command lists. 169 + */ 170 + struct vc4_shader_state { 171 + uint32_t addr; 172 + /* Maximum vertex index referenced by any primitive using this 173 + * shader state. 174 + */ 175 + uint32_t max_index; 176 + } *shader_state; 177 + 178 + /** How many shader states the user declared they were using. */ 179 + uint32_t shader_state_size; 180 + /** How many shader state records the validator has seen. */ 181 + uint32_t shader_state_count; 182 + 183 + bool found_tile_binning_mode_config_packet; 184 + bool found_start_tile_binning_packet; 185 + bool found_increment_semaphore_packet; 186 + bool found_flush; 187 + uint8_t bin_tiles_x, bin_tiles_y; 188 + struct drm_gem_cma_object *tile_bo; 189 + uint32_t tile_alloc_offset; 190 + 191 + /** 192 + * Computed addresses pointing into exec_bo where we start the 193 + * bin thread (ct0) and render thread (ct1). 194 + */ 195 + uint32_t ct0ca, ct0ea; 196 + uint32_t ct1ca, ct1ea; 197 + 198 + /* Pointer to the unvalidated bin CL (if present). */ 199 + void *bin_u; 200 + 201 + /* Pointers to the shader recs. These paddr gets incremented as CL 202 + * packets are relocated in validate_gl_shader_state, and the vaddrs 203 + * (u and v) get incremented and size decremented as the shader recs 204 + * themselves are validated. 205 + */ 206 + void *shader_rec_u; 207 + void *shader_rec_v; 208 + uint32_t shader_rec_p; 209 + uint32_t shader_rec_size; 210 + 211 + /* Pointers to the uniform data. These pointers are incremented, and 212 + * size decremented, as each batch of uniforms is uploaded. 213 + */ 214 + void *uniforms_u; 215 + void *uniforms_v; 216 + uint32_t uniforms_p; 217 + uint32_t uniforms_size; 218 + }; 219 + 220 + static inline struct vc4_exec_info * 221 + vc4_first_job(struct vc4_dev *vc4) 222 + { 223 + if (list_empty(&vc4->job_list)) 224 + return NULL; 225 + return list_first_entry(&vc4->job_list, struct vc4_exec_info, head); 226 + } 177 227 178 228 /** 179 229 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture ··· 371 231 /* vc4_drv.c */ 372 232 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 373 233 234 + /* vc4_gem.c */ 235 + void vc4_gem_init(struct drm_device *dev); 236 + void vc4_gem_destroy(struct drm_device *dev); 237 + int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 238 + struct drm_file *file_priv); 239 + int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 240 + struct drm_file *file_priv); 241 + int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 242 + struct drm_file *file_priv); 243 + void vc4_submit_next_job(struct drm_device *dev); 244 + int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 245 + uint64_t timeout_ns, bool interruptible); 246 + void vc4_job_handle_completed(struct vc4_dev *vc4); 247 + 374 248 /* vc4_hdmi.c */ 375 249 extern struct platform_driver vc4_hdmi_driver; 376 250 int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); 251 + 252 + /* vc4_irq.c */ 253 + irqreturn_t vc4_irq(int irq, void *arg); 254 + void vc4_irq_preinstall(struct drm_device *dev); 255 + int vc4_irq_postinstall(struct drm_device *dev); 256 + void vc4_irq_uninstall(struct drm_device *dev); 257 + void vc4_irq_reset(struct drm_device *dev); 377 258 378 259 /* vc4_hvs.c */ 379 260 extern struct platform_driver vc4_hvs_driver; ··· 414 253 extern struct platform_driver vc4_v3d_driver; 415 254 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); 416 255 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); 256 + int vc4_v3d_set_power(struct vc4_dev *vc4, bool on); 257 + 258 + /* vc4_validate.c */ 259 + int 260 + vc4_validate_bin_cl(struct drm_device *dev, 261 + void *validated, 262 + void *unvalidated, 263 + struct vc4_exec_info *exec); 264 + 265 + int 266 + vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); 267 + 268 + struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec, 269 + uint32_t hindex); 270 + 271 + int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); 272 + 273 + bool vc4_check_tex_size(struct vc4_exec_info *exec, 274 + struct drm_gem_cma_object *fbo, 275 + uint32_t offset, uint8_t tiling_format, 276 + uint32_t width, uint32_t height, uint8_t cpp); 417 277 418 278 /* vc4_validate_shader.c */ 419 279 struct vc4_validated_shader_info *

+642

drivers/gpu/drm/vc4/vc4_gem.c

··· 1 + /* 2 + * Copyright © 2014 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + #include <linux/module.h> 25 + #include <linux/platform_device.h> 26 + #include <linux/device.h> 27 + #include <linux/io.h> 28 + 29 + #include "uapi/drm/vc4_drm.h" 30 + #include "vc4_drv.h" 31 + #include "vc4_regs.h" 32 + #include "vc4_trace.h" 33 + 34 + static void 35 + vc4_queue_hangcheck(struct drm_device *dev) 36 + { 37 + struct vc4_dev *vc4 = to_vc4_dev(dev); 38 + 39 + mod_timer(&vc4->hangcheck.timer, 40 + round_jiffies_up(jiffies + msecs_to_jiffies(100))); 41 + } 42 + 43 + static void 44 + vc4_reset(struct drm_device *dev) 45 + { 46 + struct vc4_dev *vc4 = to_vc4_dev(dev); 47 + 48 + DRM_INFO("Resetting GPU.\n"); 49 + vc4_v3d_set_power(vc4, false); 50 + vc4_v3d_set_power(vc4, true); 51 + 52 + vc4_irq_reset(dev); 53 + 54 + /* Rearm the hangcheck -- another job might have been waiting 55 + * for our hung one to get kicked off, and vc4_irq_reset() 56 + * would have started it. 57 + */ 58 + vc4_queue_hangcheck(dev); 59 + } 60 + 61 + static void 62 + vc4_reset_work(struct work_struct *work) 63 + { 64 + struct vc4_dev *vc4 = 65 + container_of(work, struct vc4_dev, hangcheck.reset_work); 66 + 67 + vc4_reset(vc4->dev); 68 + } 69 + 70 + static void 71 + vc4_hangcheck_elapsed(unsigned long data) 72 + { 73 + struct drm_device *dev = (struct drm_device *)data; 74 + struct vc4_dev *vc4 = to_vc4_dev(dev); 75 + uint32_t ct0ca, ct1ca; 76 + 77 + /* If idle, we can stop watching for hangs. */ 78 + if (list_empty(&vc4->job_list)) 79 + return; 80 + 81 + ct0ca = V3D_READ(V3D_CTNCA(0)); 82 + ct1ca = V3D_READ(V3D_CTNCA(1)); 83 + 84 + /* If we've made any progress in execution, rearm the timer 85 + * and wait. 86 + */ 87 + if (ct0ca != vc4->hangcheck.last_ct0ca || 88 + ct1ca != vc4->hangcheck.last_ct1ca) { 89 + vc4->hangcheck.last_ct0ca = ct0ca; 90 + vc4->hangcheck.last_ct1ca = ct1ca; 91 + vc4_queue_hangcheck(dev); 92 + return; 93 + } 94 + 95 + /* We've gone too long with no progress, reset. This has to 96 + * be done from a work struct, since resetting can sleep and 97 + * this timer hook isn't allowed to. 98 + */ 99 + schedule_work(&vc4->hangcheck.reset_work); 100 + } 101 + 102 + static void 103 + submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 104 + { 105 + struct vc4_dev *vc4 = to_vc4_dev(dev); 106 + 107 + /* Set the current and end address of the control list. 108 + * Writing the end register is what starts the job. 109 + */ 110 + V3D_WRITE(V3D_CTNCA(thread), start); 111 + V3D_WRITE(V3D_CTNEA(thread), end); 112 + } 113 + 114 + int 115 + vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 116 + bool interruptible) 117 + { 118 + struct vc4_dev *vc4 = to_vc4_dev(dev); 119 + int ret = 0; 120 + unsigned long timeout_expire; 121 + DEFINE_WAIT(wait); 122 + 123 + if (vc4->finished_seqno >= seqno) 124 + return 0; 125 + 126 + if (timeout_ns == 0) 127 + return -ETIME; 128 + 129 + timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 130 + 131 + trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 132 + for (;;) { 133 + prepare_to_wait(&vc4->job_wait_queue, &wait, 134 + interruptible ? TASK_INTERRUPTIBLE : 135 + TASK_UNINTERRUPTIBLE); 136 + 137 + if (interruptible && signal_pending(current)) { 138 + ret = -ERESTARTSYS; 139 + break; 140 + } 141 + 142 + if (vc4->finished_seqno >= seqno) 143 + break; 144 + 145 + if (timeout_ns != ~0ull) { 146 + if (time_after_eq(jiffies, timeout_expire)) { 147 + ret = -ETIME; 148 + break; 149 + } 150 + schedule_timeout(timeout_expire - jiffies); 151 + } else { 152 + schedule(); 153 + } 154 + } 155 + 156 + finish_wait(&vc4->job_wait_queue, &wait); 157 + trace_vc4_wait_for_seqno_end(dev, seqno); 158 + 159 + if (ret && ret != -ERESTARTSYS) { 160 + DRM_ERROR("timeout waiting for render thread idle\n"); 161 + return ret; 162 + } 163 + 164 + return 0; 165 + } 166 + 167 + static void 168 + vc4_flush_caches(struct drm_device *dev) 169 + { 170 + struct vc4_dev *vc4 = to_vc4_dev(dev); 171 + 172 + /* Flush the GPU L2 caches. These caches sit on top of system 173 + * L3 (the 128kb or so shared with the CPU), and are 174 + * non-allocating in the L3. 175 + */ 176 + V3D_WRITE(V3D_L2CACTL, 177 + V3D_L2CACTL_L2CCLR); 178 + 179 + V3D_WRITE(V3D_SLCACTL, 180 + VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 181 + VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 182 + VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 183 + VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 184 + } 185 + 186 + /* Sets the registers for the next job to be actually be executed in 187 + * the hardware. 188 + * 189 + * The job_lock should be held during this. 190 + */ 191 + void 192 + vc4_submit_next_job(struct drm_device *dev) 193 + { 194 + struct vc4_dev *vc4 = to_vc4_dev(dev); 195 + struct vc4_exec_info *exec = vc4_first_job(vc4); 196 + 197 + if (!exec) 198 + return; 199 + 200 + vc4_flush_caches(dev); 201 + 202 + /* Disable the binner's pre-loaded overflow memory address */ 203 + V3D_WRITE(V3D_BPOA, 0); 204 + V3D_WRITE(V3D_BPOS, 0); 205 + 206 + if (exec->ct0ca != exec->ct0ea) 207 + submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 208 + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 209 + } 210 + 211 + static void 212 + vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 213 + { 214 + struct vc4_bo *bo; 215 + unsigned i; 216 + 217 + for (i = 0; i < exec->bo_count; i++) { 218 + bo = to_vc4_bo(&exec->bo[i]->base); 219 + bo->seqno = seqno; 220 + } 221 + 222 + list_for_each_entry(bo, &exec->unref_list, unref_head) { 223 + bo->seqno = seqno; 224 + } 225 + } 226 + 227 + /* Queues a struct vc4_exec_info for execution. If no job is 228 + * currently executing, then submits it. 229 + * 230 + * Unlike most GPUs, our hardware only handles one command list at a 231 + * time. To queue multiple jobs at once, we'd need to edit the 232 + * previous command list to have a jump to the new one at the end, and 233 + * then bump the end address. That's a change for a later date, 234 + * though. 235 + */ 236 + static void 237 + vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 238 + { 239 + struct vc4_dev *vc4 = to_vc4_dev(dev); 240 + uint64_t seqno; 241 + unsigned long irqflags; 242 + 243 + spin_lock_irqsave(&vc4->job_lock, irqflags); 244 + 245 + seqno = ++vc4->emit_seqno; 246 + exec->seqno = seqno; 247 + vc4_update_bo_seqnos(exec, seqno); 248 + 249 + list_add_tail(&exec->head, &vc4->job_list); 250 + 251 + /* If no job was executing, kick ours off. Otherwise, it'll 252 + * get started when the previous job's frame done interrupt 253 + * occurs. 254 + */ 255 + if (vc4_first_job(vc4) == exec) { 256 + vc4_submit_next_job(dev); 257 + vc4_queue_hangcheck(dev); 258 + } 259 + 260 + spin_unlock_irqrestore(&vc4->job_lock, irqflags); 261 + } 262 + 263 + /** 264 + * Looks up a bunch of GEM handles for BOs and stores the array for 265 + * use in the command validator that actually writes relocated 266 + * addresses pointing to them. 267 + */ 268 + static int 269 + vc4_cl_lookup_bos(struct drm_device *dev, 270 + struct drm_file *file_priv, 271 + struct vc4_exec_info *exec) 272 + { 273 + struct drm_vc4_submit_cl *args = exec->args; 274 + uint32_t *handles; 275 + int ret = 0; 276 + int i; 277 + 278 + exec->bo_count = args->bo_handle_count; 279 + 280 + if (!exec->bo_count) { 281 + /* See comment on bo_index for why we have to check 282 + * this. 283 + */ 284 + DRM_ERROR("Rendering requires BOs to validate\n"); 285 + return -EINVAL; 286 + } 287 + 288 + exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), 289 + GFP_KERNEL); 290 + if (!exec->bo) { 291 + DRM_ERROR("Failed to allocate validated BO pointers\n"); 292 + return -ENOMEM; 293 + } 294 + 295 + handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 296 + if (!handles) { 297 + DRM_ERROR("Failed to allocate incoming GEM handles\n"); 298 + goto fail; 299 + } 300 + 301 + ret = copy_from_user(handles, 302 + (void __user *)(uintptr_t)args->bo_handles, 303 + exec->bo_count * sizeof(uint32_t)); 304 + if (ret) { 305 + DRM_ERROR("Failed to copy in GEM handles\n"); 306 + goto fail; 307 + } 308 + 309 + spin_lock(&file_priv->table_lock); 310 + for (i = 0; i < exec->bo_count; i++) { 311 + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 312 + handles[i]); 313 + if (!bo) { 314 + DRM_ERROR("Failed to look up GEM BO %d: %d\n", 315 + i, handles[i]); 316 + ret = -EINVAL; 317 + spin_unlock(&file_priv->table_lock); 318 + goto fail; 319 + } 320 + drm_gem_object_reference(bo); 321 + exec->bo[i] = (struct drm_gem_cma_object *)bo; 322 + } 323 + spin_unlock(&file_priv->table_lock); 324 + 325 + fail: 326 + kfree(handles); 327 + return 0; 328 + } 329 + 330 + static int 331 + vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 332 + { 333 + struct drm_vc4_submit_cl *args = exec->args; 334 + void *temp = NULL; 335 + void *bin; 336 + int ret = 0; 337 + uint32_t bin_offset = 0; 338 + uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 339 + 16); 340 + uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 341 + uint32_t exec_size = uniforms_offset + args->uniforms_size; 342 + uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 343 + args->shader_rec_count); 344 + struct vc4_bo *bo; 345 + 346 + if (uniforms_offset < shader_rec_offset || 347 + exec_size < uniforms_offset || 348 + args->shader_rec_count >= (UINT_MAX / 349 + sizeof(struct vc4_shader_state)) || 350 + temp_size < exec_size) { 351 + DRM_ERROR("overflow in exec arguments\n"); 352 + goto fail; 353 + } 354 + 355 + /* Allocate space where we'll store the copied in user command lists 356 + * and shader records. 357 + * 358 + * We don't just copy directly into the BOs because we need to 359 + * read the contents back for validation, and I think the 360 + * bo->vaddr is uncached access. 361 + */ 362 + temp = kmalloc(temp_size, GFP_KERNEL); 363 + if (!temp) { 364 + DRM_ERROR("Failed to allocate storage for copying " 365 + "in bin/render CLs.\n"); 366 + ret = -ENOMEM; 367 + goto fail; 368 + } 369 + bin = temp + bin_offset; 370 + exec->shader_rec_u = temp + shader_rec_offset; 371 + exec->uniforms_u = temp + uniforms_offset; 372 + exec->shader_state = temp + exec_size; 373 + exec->shader_state_size = args->shader_rec_count; 374 + 375 + ret = copy_from_user(bin, 376 + (void __user *)(uintptr_t)args->bin_cl, 377 + args->bin_cl_size); 378 + if (ret) { 379 + DRM_ERROR("Failed to copy in bin cl\n"); 380 + goto fail; 381 + } 382 + 383 + ret = copy_from_user(exec->shader_rec_u, 384 + (void __user *)(uintptr_t)args->shader_rec, 385 + args->shader_rec_size); 386 + if (ret) { 387 + DRM_ERROR("Failed to copy in shader recs\n"); 388 + goto fail; 389 + } 390 + 391 + ret = copy_from_user(exec->uniforms_u, 392 + (void __user *)(uintptr_t)args->uniforms, 393 + args->uniforms_size); 394 + if (ret) { 395 + DRM_ERROR("Failed to copy in uniforms cl\n"); 396 + goto fail; 397 + } 398 + 399 + bo = vc4_bo_create(dev, exec_size, true); 400 + if (!bo) { 401 + DRM_ERROR("Couldn't allocate BO for binning\n"); 402 + ret = PTR_ERR(exec->exec_bo); 403 + goto fail; 404 + } 405 + exec->exec_bo = &bo->base; 406 + 407 + list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 408 + &exec->unref_list); 409 + 410 + exec->ct0ca = exec->exec_bo->paddr + bin_offset; 411 + 412 + exec->bin_u = bin; 413 + 414 + exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 415 + exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 416 + exec->shader_rec_size = args->shader_rec_size; 417 + 418 + exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 419 + exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 420 + exec->uniforms_size = args->uniforms_size; 421 + 422 + ret = vc4_validate_bin_cl(dev, 423 + exec->exec_bo->vaddr + bin_offset, 424 + bin, 425 + exec); 426 + if (ret) 427 + goto fail; 428 + 429 + ret = vc4_validate_shader_recs(dev, exec); 430 + 431 + fail: 432 + kfree(temp); 433 + return ret; 434 + } 435 + 436 + static void 437 + vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 438 + { 439 + unsigned i; 440 + 441 + /* Need the struct lock for drm_gem_object_unreference(). */ 442 + mutex_lock(&dev->struct_mutex); 443 + if (exec->bo) { 444 + for (i = 0; i < exec->bo_count; i++) 445 + drm_gem_object_unreference(&exec->bo[i]->base); 446 + kfree(exec->bo); 447 + } 448 + 449 + while (!list_empty(&exec->unref_list)) { 450 + struct vc4_bo *bo = list_first_entry(&exec->unref_list, 451 + struct vc4_bo, unref_head); 452 + list_del(&bo->unref_head); 453 + drm_gem_object_unreference(&bo->base.base); 454 + } 455 + mutex_unlock(&dev->struct_mutex); 456 + 457 + kfree(exec); 458 + } 459 + 460 + void 461 + vc4_job_handle_completed(struct vc4_dev *vc4) 462 + { 463 + unsigned long irqflags; 464 + 465 + spin_lock_irqsave(&vc4->job_lock, irqflags); 466 + while (!list_empty(&vc4->job_done_list)) { 467 + struct vc4_exec_info *exec = 468 + list_first_entry(&vc4->job_done_list, 469 + struct vc4_exec_info, head); 470 + list_del(&exec->head); 471 + 472 + spin_unlock_irqrestore(&vc4->job_lock, irqflags); 473 + vc4_complete_exec(vc4->dev, exec); 474 + spin_lock_irqsave(&vc4->job_lock, irqflags); 475 + } 476 + spin_unlock_irqrestore(&vc4->job_lock, irqflags); 477 + } 478 + 479 + /* Scheduled when any job has been completed, this walks the list of 480 + * jobs that had completed and unrefs their BOs and frees their exec 481 + * structs. 482 + */ 483 + static void 484 + vc4_job_done_work(struct work_struct *work) 485 + { 486 + struct vc4_dev *vc4 = 487 + container_of(work, struct vc4_dev, job_done_work); 488 + 489 + vc4_job_handle_completed(vc4); 490 + } 491 + 492 + static int 493 + vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 494 + uint64_t seqno, 495 + uint64_t *timeout_ns) 496 + { 497 + unsigned long start = jiffies; 498 + int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 499 + 500 + if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 501 + uint64_t delta = jiffies_to_nsecs(jiffies - start); 502 + 503 + if (*timeout_ns >= delta) 504 + *timeout_ns -= delta; 505 + } 506 + 507 + return ret; 508 + } 509 + 510 + int 511 + vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 512 + struct drm_file *file_priv) 513 + { 514 + struct drm_vc4_wait_seqno *args = data; 515 + 516 + return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 517 + &args->timeout_ns); 518 + } 519 + 520 + int 521 + vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 522 + struct drm_file *file_priv) 523 + { 524 + int ret; 525 + struct drm_vc4_wait_bo *args = data; 526 + struct drm_gem_object *gem_obj; 527 + struct vc4_bo *bo; 528 + 529 + gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); 530 + if (!gem_obj) { 531 + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 532 + return -EINVAL; 533 + } 534 + bo = to_vc4_bo(gem_obj); 535 + 536 + ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 537 + &args->timeout_ns); 538 + 539 + drm_gem_object_unreference_unlocked(gem_obj); 540 + return ret; 541 + } 542 + 543 + /** 544 + * Submits a command list to the VC4. 545 + * 546 + * This is what is called batchbuffer emitting on other hardware. 547 + */ 548 + int 549 + vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 550 + struct drm_file *file_priv) 551 + { 552 + struct vc4_dev *vc4 = to_vc4_dev(dev); 553 + struct drm_vc4_submit_cl *args = data; 554 + struct vc4_exec_info *exec; 555 + int ret; 556 + 557 + if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 558 + DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 559 + return -EINVAL; 560 + } 561 + 562 + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 563 + if (!exec) { 564 + DRM_ERROR("malloc failure on exec struct\n"); 565 + return -ENOMEM; 566 + } 567 + 568 + exec->args = args; 569 + INIT_LIST_HEAD(&exec->unref_list); 570 + 571 + ret = vc4_cl_lookup_bos(dev, file_priv, exec); 572 + if (ret) 573 + goto fail; 574 + 575 + if (exec->args->bin_cl_size != 0) { 576 + ret = vc4_get_bcl(dev, exec); 577 + if (ret) 578 + goto fail; 579 + } else { 580 + exec->ct0ca = 0; 581 + exec->ct0ea = 0; 582 + } 583 + 584 + ret = vc4_get_rcl(dev, exec); 585 + if (ret) 586 + goto fail; 587 + 588 + /* Clear this out of the struct we'll be putting in the queue, 589 + * since it's part of our stack. 590 + */ 591 + exec->args = NULL; 592 + 593 + vc4_queue_submit(dev, exec); 594 + 595 + /* Return the seqno for our job. */ 596 + args->seqno = vc4->emit_seqno; 597 + 598 + return 0; 599 + 600 + fail: 601 + vc4_complete_exec(vc4->dev, exec); 602 + 603 + return ret; 604 + } 605 + 606 + void 607 + vc4_gem_init(struct drm_device *dev) 608 + { 609 + struct vc4_dev *vc4 = to_vc4_dev(dev); 610 + 611 + INIT_LIST_HEAD(&vc4->job_list); 612 + INIT_LIST_HEAD(&vc4->job_done_list); 613 + spin_lock_init(&vc4->job_lock); 614 + 615 + INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 616 + setup_timer(&vc4->hangcheck.timer, 617 + vc4_hangcheck_elapsed, 618 + (unsigned long)dev); 619 + 620 + INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 621 + } 622 + 623 + void 624 + vc4_gem_destroy(struct drm_device *dev) 625 + { 626 + struct vc4_dev *vc4 = to_vc4_dev(dev); 627 + 628 + /* Waiting for exec to finish would need to be done before 629 + * unregistering V3D. 630 + */ 631 + WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 632 + 633 + /* V3D should already have disabled its interrupt and cleared 634 + * the overflow allocation registers. Now free the object. 635 + */ 636 + if (vc4->overflow_mem) { 637 + drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 638 + vc4->overflow_mem = NULL; 639 + } 640 + 641 + vc4_bo_cache_destroy(dev); 642 + }

+210

drivers/gpu/drm/vc4/vc4_irq.c

··· 1 + /* 2 + * Copyright © 2014 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + /** DOC: Interrupt management for the V3D engine. 25 + * 26 + * We have an interrupt status register (V3D_INTCTL) which reports 27 + * interrupts, and where writing 1 bits clears those interrupts. 28 + * There are also a pair of interrupt registers 29 + * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or 30 + * disables that specific interrupt, and 0s written are ignored 31 + * (reading either one returns the set of enabled interrupts). 32 + * 33 + * When we take a render frame interrupt, we need to wake the 34 + * processes waiting for some frame to be done, and get the next frame 35 + * submitted ASAP (so the hardware doesn't sit idle when there's work 36 + * to do). 37 + * 38 + * When we take the binner out of memory interrupt, we need to 39 + * allocate some new memory and pass it to the binner so that the 40 + * current job can make progress. 41 + */ 42 + 43 + #include "vc4_drv.h" 44 + #include "vc4_regs.h" 45 + 46 + #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ 47 + V3D_INT_FRDONE) 48 + 49 + DECLARE_WAIT_QUEUE_HEAD(render_wait); 50 + 51 + static void 52 + vc4_overflow_mem_work(struct work_struct *work) 53 + { 54 + struct vc4_dev *vc4 = 55 + container_of(work, struct vc4_dev, overflow_mem_work); 56 + struct drm_device *dev = vc4->dev; 57 + struct vc4_bo *bo; 58 + 59 + bo = vc4_bo_create(dev, 256 * 1024, true); 60 + if (!bo) { 61 + DRM_ERROR("Couldn't allocate binner overflow mem\n"); 62 + return; 63 + } 64 + 65 + /* If there's a job executing currently, then our previous 66 + * overflow allocation is getting used in that job and we need 67 + * to queue it to be released when the job is done. But if no 68 + * job is executing at all, then we can free the old overflow 69 + * object direcctly. 70 + * 71 + * No lock necessary for this pointer since we're the only 72 + * ones that update the pointer, and our workqueue won't 73 + * reenter. 74 + */ 75 + if (vc4->overflow_mem) { 76 + struct vc4_exec_info *current_exec; 77 + unsigned long irqflags; 78 + 79 + spin_lock_irqsave(&vc4->job_lock, irqflags); 80 + current_exec = vc4_first_job(vc4); 81 + if (current_exec) { 82 + vc4->overflow_mem->seqno = vc4->finished_seqno + 1; 83 + list_add_tail(&vc4->overflow_mem->unref_head, 84 + &current_exec->unref_list); 85 + vc4->overflow_mem = NULL; 86 + } 87 + spin_unlock_irqrestore(&vc4->job_lock, irqflags); 88 + } 89 + 90 + if (vc4->overflow_mem) 91 + drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 92 + vc4->overflow_mem = bo; 93 + 94 + V3D_WRITE(V3D_BPOA, bo->base.paddr); 95 + V3D_WRITE(V3D_BPOS, bo->base.base.size); 96 + V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM); 97 + V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); 98 + } 99 + 100 + static void 101 + vc4_irq_finish_job(struct drm_device *dev) 102 + { 103 + struct vc4_dev *vc4 = to_vc4_dev(dev); 104 + struct vc4_exec_info *exec = vc4_first_job(vc4); 105 + 106 + if (!exec) 107 + return; 108 + 109 + vc4->finished_seqno++; 110 + list_move_tail(&exec->head, &vc4->job_done_list); 111 + vc4_submit_next_job(dev); 112 + 113 + wake_up_all(&vc4->job_wait_queue); 114 + schedule_work(&vc4->job_done_work); 115 + } 116 + 117 + irqreturn_t 118 + vc4_irq(int irq, void *arg) 119 + { 120 + struct drm_device *dev = arg; 121 + struct vc4_dev *vc4 = to_vc4_dev(dev); 122 + uint32_t intctl; 123 + irqreturn_t status = IRQ_NONE; 124 + 125 + barrier(); 126 + intctl = V3D_READ(V3D_INTCTL); 127 + 128 + /* Acknowledge the interrupts we're handling here. The render 129 + * frame done interrupt will be cleared, while OUTOMEM will 130 + * stay high until the underlying cause is cleared. 131 + */ 132 + V3D_WRITE(V3D_INTCTL, intctl); 133 + 134 + if (intctl & V3D_INT_OUTOMEM) { 135 + /* Disable OUTOMEM until the work is done. */ 136 + V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM); 137 + schedule_work(&vc4->overflow_mem_work); 138 + status = IRQ_HANDLED; 139 + } 140 + 141 + if (intctl & V3D_INT_FRDONE) { 142 + spin_lock(&vc4->job_lock); 143 + vc4_irq_finish_job(dev); 144 + spin_unlock(&vc4->job_lock); 145 + status = IRQ_HANDLED; 146 + } 147 + 148 + return status; 149 + } 150 + 151 + void 152 + vc4_irq_preinstall(struct drm_device *dev) 153 + { 154 + struct vc4_dev *vc4 = to_vc4_dev(dev); 155 + 156 + init_waitqueue_head(&vc4->job_wait_queue); 157 + INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work); 158 + 159 + /* Clear any pending interrupts someone might have left around 160 + * for us. 161 + */ 162 + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); 163 + } 164 + 165 + int 166 + vc4_irq_postinstall(struct drm_device *dev) 167 + { 168 + struct vc4_dev *vc4 = to_vc4_dev(dev); 169 + 170 + /* Enable both the render done and out of memory interrupts. */ 171 + V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); 172 + 173 + return 0; 174 + } 175 + 176 + void 177 + vc4_irq_uninstall(struct drm_device *dev) 178 + { 179 + struct vc4_dev *vc4 = to_vc4_dev(dev); 180 + 181 + /* Disable sending interrupts for our driver's IRQs. */ 182 + V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS); 183 + 184 + /* Clear any pending interrupts we might have left. */ 185 + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); 186 + 187 + cancel_work_sync(&vc4->overflow_mem_work); 188 + } 189 + 190 + /** Reinitializes interrupt registers when a GPU reset is performed. */ 191 + void vc4_irq_reset(struct drm_device *dev) 192 + { 193 + struct vc4_dev *vc4 = to_vc4_dev(dev); 194 + unsigned long irqflags; 195 + 196 + /* Acknowledge any stale IRQs. */ 197 + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); 198 + 199 + /* 200 + * Turn all our interrupts on. Binner out of memory is the 201 + * only one we expect to trigger at this point, since we've 202 + * just come from poweron and haven't supplied any overflow 203 + * memory yet. 204 + */ 205 + V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); 206 + 207 + spin_lock_irqsave(&vc4->job_lock, irqflags); 208 + vc4_irq_finish_job(dev); 209 + spin_unlock_irqrestore(&vc4->job_lock, irqflags); 210 + }

+399

drivers/gpu/drm/vc4/vc4_packet.h

··· 1 + /* 2 + * Copyright © 2014 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + #ifndef VC4_PACKET_H 25 + #define VC4_PACKET_H 26 + 27 + #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */ 28 + 29 + enum vc4_packet { 30 + VC4_PACKET_HALT = 0, 31 + VC4_PACKET_NOP = 1, 32 + 33 + VC4_PACKET_FLUSH = 4, 34 + VC4_PACKET_FLUSH_ALL = 5, 35 + VC4_PACKET_START_TILE_BINNING = 6, 36 + VC4_PACKET_INCREMENT_SEMAPHORE = 7, 37 + VC4_PACKET_WAIT_ON_SEMAPHORE = 8, 38 + 39 + VC4_PACKET_BRANCH = 16, 40 + VC4_PACKET_BRANCH_TO_SUB_LIST = 17, 41 + 42 + VC4_PACKET_STORE_MS_TILE_BUFFER = 24, 43 + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, 44 + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, 45 + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, 46 + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, 47 + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, 48 + 49 + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, 50 + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, 51 + 52 + VC4_PACKET_COMPRESSED_PRIMITIVE = 48, 53 + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, 54 + 55 + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, 56 + 57 + VC4_PACKET_GL_SHADER_STATE = 64, 58 + VC4_PACKET_NV_SHADER_STATE = 65, 59 + VC4_PACKET_VG_SHADER_STATE = 66, 60 + 61 + VC4_PACKET_CONFIGURATION_BITS = 96, 62 + VC4_PACKET_FLAT_SHADE_FLAGS = 97, 63 + VC4_PACKET_POINT_SIZE = 98, 64 + VC4_PACKET_LINE_WIDTH = 99, 65 + VC4_PACKET_RHT_X_BOUNDARY = 100, 66 + VC4_PACKET_DEPTH_OFFSET = 101, 67 + VC4_PACKET_CLIP_WINDOW = 102, 68 + VC4_PACKET_VIEWPORT_OFFSET = 103, 69 + VC4_PACKET_Z_CLIPPING = 104, 70 + VC4_PACKET_CLIPPER_XY_SCALING = 105, 71 + VC4_PACKET_CLIPPER_Z_SCALING = 106, 72 + 73 + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, 74 + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, 75 + VC4_PACKET_CLEAR_COLORS = 114, 76 + VC4_PACKET_TILE_COORDINATES = 115, 77 + 78 + /* Not an actual hardware packet -- this is what we use to put 79 + * references to GEM bos in the command stream, since we need the u32 80 + * int the actual address packet in order to store the offset from the 81 + * start of the BO. 82 + */ 83 + VC4_PACKET_GEM_HANDLES = 254, 84 + } __attribute__ ((__packed__)); 85 + 86 + #define VC4_PACKET_HALT_SIZE 1 87 + #define VC4_PACKET_NOP_SIZE 1 88 + #define VC4_PACKET_FLUSH_SIZE 1 89 + #define VC4_PACKET_FLUSH_ALL_SIZE 1 90 + #define VC4_PACKET_START_TILE_BINNING_SIZE 1 91 + #define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 92 + #define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 93 + #define VC4_PACKET_BRANCH_SIZE 5 94 + #define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 95 + #define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 96 + #define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 97 + #define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5 98 + #define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5 99 + #define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 100 + #define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 101 + #define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 102 + #define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 103 + #define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1 104 + #define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1 105 + #define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 106 + #define VC4_PACKET_GL_SHADER_STATE_SIZE 5 107 + #define VC4_PACKET_NV_SHADER_STATE_SIZE 5 108 + #define VC4_PACKET_VG_SHADER_STATE_SIZE 5 109 + #define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 110 + #define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 111 + #define VC4_PACKET_POINT_SIZE_SIZE 5 112 + #define VC4_PACKET_LINE_WIDTH_SIZE 5 113 + #define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 114 + #define VC4_PACKET_DEPTH_OFFSET_SIZE 5 115 + #define VC4_PACKET_CLIP_WINDOW_SIZE 9 116 + #define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 117 + #define VC4_PACKET_Z_CLIPPING_SIZE 9 118 + #define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 119 + #define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 120 + #define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 121 + #define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 122 + #define VC4_PACKET_CLEAR_COLORS_SIZE 14 123 + #define VC4_PACKET_TILE_COORDINATES_SIZE 3 124 + #define VC4_PACKET_GEM_HANDLES_SIZE 9 125 + 126 + /* Number of multisamples supported. */ 127 + #define VC4_MAX_SAMPLES 4 128 + /* Size of a full resolution color or Z tile buffer load/store. */ 129 + #define VC4_TILE_BUFFER_SIZE (64 * 64 * 4) 130 + 131 + /** @{ 132 + * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 133 + * VC4_PACKET_TILE_RENDERING_MODE_CONFIG. 134 + */ 135 + #define VC4_TILING_FORMAT_LINEAR 0 136 + #define VC4_TILING_FORMAT_T 1 137 + #define VC4_TILING_FORMAT_LT 2 138 + /** @} */ 139 + 140 + /** @{ 141 + * 142 + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and 143 + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. 144 + */ 145 + #define VC4_LOADSTORE_FULL_RES_EOF BIT(3) 146 + #define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2) 147 + #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1) 148 + #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0) 149 + 150 + /** @{ 151 + * 152 + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and 153 + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. 154 + */ 155 + #define VC4_LOADSTORE_FULL_RES_EOF BIT(3) 156 + #define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2) 157 + #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1) 158 + #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0) 159 + 160 + /** @{ 161 + * 162 + * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 163 + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) 164 + */ 165 + 166 + #define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3) 167 + #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2) 168 + #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1) 169 + #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0) 170 + 171 + /** @} */ 172 + 173 + /** @{ 174 + * 175 + * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 176 + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL 177 + */ 178 + #define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15) 179 + #define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14) 180 + #define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13) 181 + #define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12) 182 + 183 + #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) 184 + #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 185 + #define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 186 + #define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 187 + #define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 188 + /** @} */ 189 + 190 + /** @{ 191 + * 192 + * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 193 + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL 194 + */ 195 + #define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) 196 + #define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 197 + #define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) 198 + #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) 199 + #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) 200 + 201 + /** The values of the field are VC4_TILING_FORMAT_* */ 202 + #define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) 203 + #define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 204 + 205 + #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) 206 + #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 207 + #define VC4_LOADSTORE_TILE_BUFFER_NONE 0 208 + #define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 209 + #define VC4_LOADSTORE_TILE_BUFFER_ZS 2 210 + #define VC4_LOADSTORE_TILE_BUFFER_Z 3 211 + #define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 212 + #define VC4_LOADSTORE_TILE_BUFFER_FULL 5 213 + /** @} */ 214 + 215 + #define VC4_INDEX_BUFFER_U8 (0 << 4) 216 + #define VC4_INDEX_BUFFER_U16 (1 << 4) 217 + 218 + /* This flag is only present in NV shader state. */ 219 + #define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3) 220 + #define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2) 221 + #define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1) 222 + #define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0) 223 + 224 + /** @{ byte 2 of config bits. */ 225 + #define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1) 226 + #define VC4_CONFIG_BITS_EARLY_Z BIT(0) 227 + /** @} */ 228 + 229 + /** @{ byte 1 of config bits. */ 230 + #define VC4_CONFIG_BITS_Z_UPDATE BIT(7) 231 + /** same values in this 3-bit field as PIPE_FUNC_* */ 232 + #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 233 + #define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3) 234 + 235 + #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) 236 + #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) 237 + #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) 238 + #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) 239 + 240 + #define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0) 241 + /** @} */ 242 + 243 + /** @{ byte 0 of config bits. */ 244 + #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6) 245 + #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) 246 + #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) 247 + 248 + #define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4) 249 + #define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3) 250 + #define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2) 251 + #define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1) 252 + #define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0) 253 + /** @} */ 254 + 255 + /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ 256 + #define VC4_BIN_CONFIG_DB_NON_MS BIT(7) 257 + 258 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) 259 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 260 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 261 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 262 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 263 + #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 264 + 265 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) 266 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 267 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 268 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 269 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 270 + #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 271 + 272 + #define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2) 273 + #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1) 274 + #define VC4_BIN_CONFIG_MS_MODE_4X BIT(0) 275 + /** @} */ 276 + 277 + /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ 278 + #define VC4_RENDER_CONFIG_DB_NON_MS BIT(12) 279 + #define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11) 280 + #define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10) 281 + #define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9) 282 + #define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8) 283 + 284 + /** The values of the field are VC4_TILING_FORMAT_* */ 285 + #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) 286 + #define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 287 + 288 + #define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) 289 + #define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) 290 + #define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) 291 + 292 + #define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) 293 + #define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 294 + #define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 295 + #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 296 + #define VC4_RENDER_CONFIG_FORMAT_BGR565 2 297 + 298 + #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1) 299 + #define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0) 300 + 301 + #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) 302 + #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) 303 + #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0) 304 + #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0) 305 + #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0) 306 + #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) 307 + 308 + enum vc4_texture_data_type { 309 + VC4_TEXTURE_TYPE_RGBA8888 = 0, 310 + VC4_TEXTURE_TYPE_RGBX8888 = 1, 311 + VC4_TEXTURE_TYPE_RGBA4444 = 2, 312 + VC4_TEXTURE_TYPE_RGBA5551 = 3, 313 + VC4_TEXTURE_TYPE_RGB565 = 4, 314 + VC4_TEXTURE_TYPE_LUMINANCE = 5, 315 + VC4_TEXTURE_TYPE_ALPHA = 6, 316 + VC4_TEXTURE_TYPE_LUMALPHA = 7, 317 + VC4_TEXTURE_TYPE_ETC1 = 8, 318 + VC4_TEXTURE_TYPE_S16F = 9, 319 + VC4_TEXTURE_TYPE_S8 = 10, 320 + VC4_TEXTURE_TYPE_S16 = 11, 321 + VC4_TEXTURE_TYPE_BW1 = 12, 322 + VC4_TEXTURE_TYPE_A4 = 13, 323 + VC4_TEXTURE_TYPE_A1 = 14, 324 + VC4_TEXTURE_TYPE_RGBA64 = 15, 325 + VC4_TEXTURE_TYPE_RGBA32R = 16, 326 + VC4_TEXTURE_TYPE_YUV422R = 17, 327 + }; 328 + 329 + #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) 330 + #define VC4_TEX_P0_OFFSET_SHIFT 12 331 + #define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10) 332 + #define VC4_TEX_P0_CSWIZ_SHIFT 10 333 + #define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9) 334 + #define VC4_TEX_P0_CMMODE_SHIFT 9 335 + #define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8) 336 + #define VC4_TEX_P0_FLIPY_SHIFT 8 337 + #define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4) 338 + #define VC4_TEX_P0_TYPE_SHIFT 4 339 + #define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0) 340 + #define VC4_TEX_P0_MIPLVLS_SHIFT 0 341 + 342 + #define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31) 343 + #define VC4_TEX_P1_TYPE4_SHIFT 31 344 + #define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20) 345 + #define VC4_TEX_P1_HEIGHT_SHIFT 20 346 + #define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19) 347 + #define VC4_TEX_P1_ETCFLIP_SHIFT 19 348 + #define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8) 349 + #define VC4_TEX_P1_WIDTH_SHIFT 8 350 + 351 + #define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7) 352 + #define VC4_TEX_P1_MAGFILT_SHIFT 7 353 + # define VC4_TEX_P1_MAGFILT_LINEAR 0 354 + # define VC4_TEX_P1_MAGFILT_NEAREST 1 355 + 356 + #define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4) 357 + #define VC4_TEX_P1_MINFILT_SHIFT 4 358 + # define VC4_TEX_P1_MINFILT_LINEAR 0 359 + # define VC4_TEX_P1_MINFILT_NEAREST 1 360 + # define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2 361 + # define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3 362 + # define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4 363 + # define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5 364 + 365 + #define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2) 366 + #define VC4_TEX_P1_WRAP_T_SHIFT 2 367 + #define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0) 368 + #define VC4_TEX_P1_WRAP_S_SHIFT 0 369 + # define VC4_TEX_P1_WRAP_REPEAT 0 370 + # define VC4_TEX_P1_WRAP_CLAMP 1 371 + # define VC4_TEX_P1_WRAP_MIRROR 2 372 + # define VC4_TEX_P1_WRAP_BORDER 3 373 + 374 + #define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30) 375 + #define VC4_TEX_P2_PTYPE_SHIFT 30 376 + # define VC4_TEX_P2_PTYPE_IGNORED 0 377 + # define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1 378 + # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2 379 + # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3 380 + 381 + /* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */ 382 + #define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12) 383 + #define VC4_TEX_P2_CMST_SHIFT 12 384 + #define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0) 385 + #define VC4_TEX_P2_BSLOD_SHIFT 0 386 + 387 + /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */ 388 + #define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12) 389 + #define VC4_TEX_P2_CHEIGHT_SHIFT 12 390 + #define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0) 391 + #define VC4_TEX_P2_CWIDTH_SHIFT 0 392 + 393 + /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */ 394 + #define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12) 395 + #define VC4_TEX_P2_CYOFF_SHIFT 12 396 + #define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0) 397 + #define VC4_TEX_P2_CXOFF_SHIFT 0 398 + 399 + #endif /* VC4_PACKET_H */

+634

drivers/gpu/drm/vc4/vc4_render_cl.c

··· 1 + /* 2 + * Copyright © 2014-2015 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + /** 25 + * DOC: Render command list generation 26 + * 27 + * In the VC4 driver, render command list generation is performed by the 28 + * kernel instead of userspace. We do this because validating a 29 + * user-submitted command list is hard to get right and has high CPU overhead, 30 + * while the number of valid configurations for render command lists is 31 + * actually fairly low. 32 + */ 33 + 34 + #include "uapi/drm/vc4_drm.h" 35 + #include "vc4_drv.h" 36 + #include "vc4_packet.h" 37 + 38 + struct vc4_rcl_setup { 39 + struct drm_gem_cma_object *color_read; 40 + struct drm_gem_cma_object *color_write; 41 + struct drm_gem_cma_object *zs_read; 42 + struct drm_gem_cma_object *zs_write; 43 + struct drm_gem_cma_object *msaa_color_write; 44 + struct drm_gem_cma_object *msaa_zs_write; 45 + 46 + struct drm_gem_cma_object *rcl; 47 + u32 next_offset; 48 + }; 49 + 50 + static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) 51 + { 52 + *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val; 53 + setup->next_offset += 1; 54 + } 55 + 56 + static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val) 57 + { 58 + *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val; 59 + setup->next_offset += 2; 60 + } 61 + 62 + static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val) 63 + { 64 + *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val; 65 + setup->next_offset += 4; 66 + } 67 + 68 + /* 69 + * Emits a no-op STORE_TILE_BUFFER_GENERAL. 70 + * 71 + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of 72 + * some sort before another load is triggered. 73 + */ 74 + static void vc4_store_before_load(struct vc4_rcl_setup *setup) 75 + { 76 + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); 77 + rcl_u16(setup, 78 + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, 79 + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | 80 + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | 81 + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | 82 + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); 83 + rcl_u32(setup, 0); /* no address, since we're in None mode */ 84 + } 85 + 86 + /* 87 + * Calculates the physical address of the start of a tile in a RCL surface. 88 + * 89 + * Unlike the other load/store packets, 90 + * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile 91 + * coordinates packet, and instead just store to the address given. 92 + */ 93 + static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec, 94 + struct drm_gem_cma_object *bo, 95 + struct drm_vc4_submit_rcl_surface *surf, 96 + uint8_t x, uint8_t y) 97 + { 98 + return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE * 99 + (DIV_ROUND_UP(exec->args->width, 32) * y + x); 100 + } 101 + 102 + /* 103 + * Emits a PACKET_TILE_COORDINATES if one isn't already pending. 104 + * 105 + * The tile coordinates packet triggers a pending load if there is one, are 106 + * used for clipping during rendering, and determine where loads/stores happen 107 + * relative to their base address. 108 + */ 109 + static void vc4_tile_coordinates(struct vc4_rcl_setup *setup, 110 + uint32_t x, uint32_t y) 111 + { 112 + rcl_u8(setup, VC4_PACKET_TILE_COORDINATES); 113 + rcl_u8(setup, x); 114 + rcl_u8(setup, y); 115 + } 116 + 117 + static void emit_tile(struct vc4_exec_info *exec, 118 + struct vc4_rcl_setup *setup, 119 + uint8_t x, uint8_t y, bool first, bool last) 120 + { 121 + struct drm_vc4_submit_cl *args = exec->args; 122 + bool has_bin = args->bin_cl_size != 0; 123 + 124 + /* Note that the load doesn't actually occur until the 125 + * tile coords packet is processed, and only one load 126 + * may be outstanding at a time. 127 + */ 128 + if (setup->color_read) { 129 + if (args->color_read.flags & 130 + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 131 + rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); 132 + rcl_u32(setup, 133 + vc4_full_res_offset(exec, setup->color_read, 134 + &args->color_read, x, y) | 135 + VC4_LOADSTORE_FULL_RES_DISABLE_ZS); 136 + } else { 137 + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); 138 + rcl_u16(setup, args->color_read.bits); 139 + rcl_u32(setup, setup->color_read->paddr + 140 + args->color_read.offset); 141 + } 142 + } 143 + 144 + if (setup->zs_read) { 145 + if (args->zs_read.flags & 146 + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 147 + rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); 148 + rcl_u32(setup, 149 + vc4_full_res_offset(exec, setup->zs_read, 150 + &args->zs_read, x, y) | 151 + VC4_LOADSTORE_FULL_RES_DISABLE_COLOR); 152 + } else { 153 + if (setup->color_read) { 154 + /* Exec previous load. */ 155 + vc4_tile_coordinates(setup, x, y); 156 + vc4_store_before_load(setup); 157 + } 158 + 159 + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); 160 + rcl_u16(setup, args->zs_read.bits); 161 + rcl_u32(setup, setup->zs_read->paddr + 162 + args->zs_read.offset); 163 + } 164 + } 165 + 166 + /* Clipping depends on tile coordinates having been 167 + * emitted, so we always need one here. 168 + */ 169 + vc4_tile_coordinates(setup, x, y); 170 + 171 + /* Wait for the binner before jumping to the first 172 + * tile's lists. 173 + */ 174 + if (first && has_bin) 175 + rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE); 176 + 177 + if (has_bin) { 178 + rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); 179 + rcl_u32(setup, (exec->tile_bo->paddr + 180 + exec->tile_alloc_offset + 181 + (y * exec->bin_tiles_x + x) * 32)); 182 + } 183 + 184 + if (setup->msaa_color_write) { 185 + bool last_tile_write = (!setup->msaa_zs_write && 186 + !setup->zs_write && 187 + !setup->color_write); 188 + uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS; 189 + 190 + if (!last_tile_write) 191 + bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; 192 + else if (last) 193 + bits |= VC4_LOADSTORE_FULL_RES_EOF; 194 + rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); 195 + rcl_u32(setup, 196 + vc4_full_res_offset(exec, setup->msaa_color_write, 197 + &args->msaa_color_write, x, y) | 198 + bits); 199 + } 200 + 201 + if (setup->msaa_zs_write) { 202 + bool last_tile_write = (!setup->zs_write && 203 + !setup->color_write); 204 + uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR; 205 + 206 + if (setup->msaa_color_write) 207 + vc4_tile_coordinates(setup, x, y); 208 + if (!last_tile_write) 209 + bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; 210 + else if (last) 211 + bits |= VC4_LOADSTORE_FULL_RES_EOF; 212 + rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); 213 + rcl_u32(setup, 214 + vc4_full_res_offset(exec, setup->msaa_zs_write, 215 + &args->msaa_zs_write, x, y) | 216 + bits); 217 + } 218 + 219 + if (setup->zs_write) { 220 + bool last_tile_write = !setup->color_write; 221 + 222 + if (setup->msaa_color_write || setup->msaa_zs_write) 223 + vc4_tile_coordinates(setup, x, y); 224 + 225 + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); 226 + rcl_u16(setup, args->zs_write.bits | 227 + (last_tile_write ? 228 + 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR)); 229 + rcl_u32(setup, 230 + (setup->zs_write->paddr + args->zs_write.offset) | 231 + ((last && last_tile_write) ? 232 + VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); 233 + } 234 + 235 + if (setup->color_write) { 236 + if (setup->msaa_color_write || setup->msaa_zs_write || 237 + setup->zs_write) { 238 + vc4_tile_coordinates(setup, x, y); 239 + } 240 + 241 + if (last) 242 + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); 243 + else 244 + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER); 245 + } 246 + } 247 + 248 + static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, 249 + struct vc4_rcl_setup *setup) 250 + { 251 + struct drm_vc4_submit_cl *args = exec->args; 252 + bool has_bin = args->bin_cl_size != 0; 253 + uint8_t min_x_tile = args->min_x_tile; 254 + uint8_t min_y_tile = args->min_y_tile; 255 + uint8_t max_x_tile = args->max_x_tile; 256 + uint8_t max_y_tile = args->max_y_tile; 257 + uint8_t xtiles = max_x_tile - min_x_tile + 1; 258 + uint8_t ytiles = max_y_tile - min_y_tile + 1; 259 + uint8_t x, y; 260 + uint32_t size, loop_body_size; 261 + 262 + size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; 263 + loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; 264 + 265 + if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { 266 + size += VC4_PACKET_CLEAR_COLORS_SIZE + 267 + VC4_PACKET_TILE_COORDINATES_SIZE + 268 + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; 269 + } 270 + 271 + if (setup->color_read) { 272 + if (args->color_read.flags & 273 + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 274 + loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; 275 + } else { 276 + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; 277 + } 278 + } 279 + if (setup->zs_read) { 280 + if (args->zs_read.flags & 281 + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 282 + loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; 283 + } else { 284 + if (setup->color_read && 285 + !(args->color_read.flags & 286 + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) { 287 + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; 288 + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; 289 + } 290 + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; 291 + } 292 + } 293 + 294 + if (has_bin) { 295 + size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; 296 + loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; 297 + } 298 + 299 + if (setup->msaa_color_write) 300 + loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; 301 + if (setup->msaa_zs_write) 302 + loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; 303 + 304 + if (setup->zs_write) 305 + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; 306 + if (setup->color_write) 307 + loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; 308 + 309 + /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */ 310 + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE * 311 + ((setup->msaa_color_write != NULL) + 312 + (setup->msaa_zs_write != NULL) + 313 + (setup->color_write != NULL) + 314 + (setup->zs_write != NULL) - 1); 315 + 316 + size += xtiles * ytiles * loop_body_size; 317 + 318 + setup->rcl = &vc4_bo_create(dev, size, true)->base; 319 + if (!setup->rcl) 320 + return -ENOMEM; 321 + list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, 322 + &exec->unref_list); 323 + 324 + rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); 325 + rcl_u32(setup, 326 + (setup->color_write ? (setup->color_write->paddr + 327 + args->color_write.offset) : 328 + 0)); 329 + rcl_u16(setup, args->width); 330 + rcl_u16(setup, args->height); 331 + rcl_u16(setup, args->color_write.bits); 332 + 333 + /* The tile buffer gets cleared when the previous tile is stored. If 334 + * the clear values changed between frames, then the tile buffer has 335 + * stale clear values in it, so we have to do a store in None mode (no 336 + * writes) so that we trigger the tile buffer clear. 337 + */ 338 + if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { 339 + rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); 340 + rcl_u32(setup, args->clear_color[0]); 341 + rcl_u32(setup, args->clear_color[1]); 342 + rcl_u32(setup, args->clear_z); 343 + rcl_u8(setup, args->clear_s); 344 + 345 + vc4_tile_coordinates(setup, 0, 0); 346 + 347 + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); 348 + rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); 349 + rcl_u32(setup, 0); /* no address, since we're in None mode */ 350 + } 351 + 352 + for (y = min_y_tile; y <= max_y_tile; y++) { 353 + for (x = min_x_tile; x <= max_x_tile; x++) { 354 + bool first = (x == min_x_tile && y == min_y_tile); 355 + bool last = (x == max_x_tile && y == max_y_tile); 356 + 357 + emit_tile(exec, setup, x, y, first, last); 358 + } 359 + } 360 + 361 + BUG_ON(setup->next_offset != size); 362 + exec->ct1ca = setup->rcl->paddr; 363 + exec->ct1ea = setup->rcl->paddr + setup->next_offset; 364 + 365 + return 0; 366 + } 367 + 368 + static int vc4_full_res_bounds_check(struct vc4_exec_info *exec, 369 + struct drm_gem_cma_object *obj, 370 + struct drm_vc4_submit_rcl_surface *surf) 371 + { 372 + struct drm_vc4_submit_cl *args = exec->args; 373 + u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32); 374 + 375 + if (surf->offset > obj->base.size) { 376 + DRM_ERROR("surface offset %d > BO size %zd\n", 377 + surf->offset, obj->base.size); 378 + return -EINVAL; 379 + } 380 + 381 + if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE < 382 + render_tiles_stride * args->max_y_tile + args->max_x_tile) { 383 + DRM_ERROR("MSAA tile %d, %d out of bounds " 384 + "(bo size %zd, offset %d).\n", 385 + args->max_x_tile, args->max_y_tile, 386 + obj->base.size, 387 + surf->offset); 388 + return -EINVAL; 389 + } 390 + 391 + return 0; 392 + } 393 + 394 + static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec, 395 + struct drm_gem_cma_object **obj, 396 + struct drm_vc4_submit_rcl_surface *surf) 397 + { 398 + if (surf->flags != 0 || surf->bits != 0) { 399 + DRM_ERROR("MSAA surface had nonzero flags/bits\n"); 400 + return -EINVAL; 401 + } 402 + 403 + if (surf->hindex == ~0) 404 + return 0; 405 + 406 + *obj = vc4_use_bo(exec, surf->hindex); 407 + if (!*obj) 408 + return -EINVAL; 409 + 410 + if (surf->offset & 0xf) { 411 + DRM_ERROR("MSAA write must be 16b aligned.\n"); 412 + return -EINVAL; 413 + } 414 + 415 + return vc4_full_res_bounds_check(exec, *obj, surf); 416 + } 417 + 418 + static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, 419 + struct drm_gem_cma_object **obj, 420 + struct drm_vc4_submit_rcl_surface *surf) 421 + { 422 + uint8_t tiling = VC4_GET_FIELD(surf->bits, 423 + VC4_LOADSTORE_TILE_BUFFER_TILING); 424 + uint8_t buffer = VC4_GET_FIELD(surf->bits, 425 + VC4_LOADSTORE_TILE_BUFFER_BUFFER); 426 + uint8_t format = VC4_GET_FIELD(surf->bits, 427 + VC4_LOADSTORE_TILE_BUFFER_FORMAT); 428 + int cpp; 429 + int ret; 430 + 431 + if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 432 + DRM_ERROR("Extra flags set\n"); 433 + return -EINVAL; 434 + } 435 + 436 + if (surf->hindex == ~0) 437 + return 0; 438 + 439 + *obj = vc4_use_bo(exec, surf->hindex); 440 + if (!*obj) 441 + return -EINVAL; 442 + 443 + if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 444 + if (surf == &exec->args->zs_write) { 445 + DRM_ERROR("general zs write may not be a full-res.\n"); 446 + return -EINVAL; 447 + } 448 + 449 + if (surf->bits != 0) { 450 + DRM_ERROR("load/store general bits set with " 451 + "full res load/store.\n"); 452 + return -EINVAL; 453 + } 454 + 455 + ret = vc4_full_res_bounds_check(exec, *obj, surf); 456 + if (!ret) 457 + return ret; 458 + 459 + return 0; 460 + } 461 + 462 + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | 463 + VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | 464 + VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { 465 + DRM_ERROR("Unknown bits in load/store: 0x%04x\n", 466 + surf->bits); 467 + return -EINVAL; 468 + } 469 + 470 + if (tiling > VC4_TILING_FORMAT_LT) { 471 + DRM_ERROR("Bad tiling format\n"); 472 + return -EINVAL; 473 + } 474 + 475 + if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) { 476 + if (format != 0) { 477 + DRM_ERROR("No color format should be set for ZS\n"); 478 + return -EINVAL; 479 + } 480 + cpp = 4; 481 + } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) { 482 + switch (format) { 483 + case VC4_LOADSTORE_TILE_BUFFER_BGR565: 484 + case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER: 485 + cpp = 2; 486 + break; 487 + case VC4_LOADSTORE_TILE_BUFFER_RGBA8888: 488 + cpp = 4; 489 + break; 490 + default: 491 + DRM_ERROR("Bad tile buffer format\n"); 492 + return -EINVAL; 493 + } 494 + } else { 495 + DRM_ERROR("Bad load/store buffer %d.\n", buffer); 496 + return -EINVAL; 497 + } 498 + 499 + if (surf->offset & 0xf) { 500 + DRM_ERROR("load/store buffer must be 16b aligned.\n"); 501 + return -EINVAL; 502 + } 503 + 504 + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, 505 + exec->args->width, exec->args->height, cpp)) { 506 + return -EINVAL; 507 + } 508 + 509 + return 0; 510 + } 511 + 512 + static int 513 + vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, 514 + struct vc4_rcl_setup *setup, 515 + struct drm_gem_cma_object **obj, 516 + struct drm_vc4_submit_rcl_surface *surf) 517 + { 518 + uint8_t tiling = VC4_GET_FIELD(surf->bits, 519 + VC4_RENDER_CONFIG_MEMORY_FORMAT); 520 + uint8_t format = VC4_GET_FIELD(surf->bits, 521 + VC4_RENDER_CONFIG_FORMAT); 522 + int cpp; 523 + 524 + if (surf->flags != 0) { 525 + DRM_ERROR("No flags supported on render config.\n"); 526 + return -EINVAL; 527 + } 528 + 529 + if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | 530 + VC4_RENDER_CONFIG_FORMAT_MASK | 531 + VC4_RENDER_CONFIG_MS_MODE_4X | 532 + VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) { 533 + DRM_ERROR("Unknown bits in render config: 0x%04x\n", 534 + surf->bits); 535 + return -EINVAL; 536 + } 537 + 538 + if (surf->hindex == ~0) 539 + return 0; 540 + 541 + *obj = vc4_use_bo(exec, surf->hindex); 542 + if (!*obj) 543 + return -EINVAL; 544 + 545 + if (tiling > VC4_TILING_FORMAT_LT) { 546 + DRM_ERROR("Bad tiling format\n"); 547 + return -EINVAL; 548 + } 549 + 550 + switch (format) { 551 + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: 552 + case VC4_RENDER_CONFIG_FORMAT_BGR565: 553 + cpp = 2; 554 + break; 555 + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: 556 + cpp = 4; 557 + break; 558 + default: 559 + DRM_ERROR("Bad tile buffer format\n"); 560 + return -EINVAL; 561 + } 562 + 563 + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, 564 + exec->args->width, exec->args->height, cpp)) { 565 + return -EINVAL; 566 + } 567 + 568 + return 0; 569 + } 570 + 571 + int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) 572 + { 573 + struct vc4_rcl_setup setup = {0}; 574 + struct drm_vc4_submit_cl *args = exec->args; 575 + bool has_bin = args->bin_cl_size != 0; 576 + int ret; 577 + 578 + if (args->min_x_tile > args->max_x_tile || 579 + args->min_y_tile > args->max_y_tile) { 580 + DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n", 581 + args->min_x_tile, args->min_y_tile, 582 + args->max_x_tile, args->max_y_tile); 583 + return -EINVAL; 584 + } 585 + 586 + if (has_bin && 587 + (args->max_x_tile > exec->bin_tiles_x || 588 + args->max_y_tile > exec->bin_tiles_y)) { 589 + DRM_ERROR("Render tiles (%d,%d) outside of bin config " 590 + "(%d,%d)\n", 591 + args->max_x_tile, args->max_y_tile, 592 + exec->bin_tiles_x, exec->bin_tiles_y); 593 + return -EINVAL; 594 + } 595 + 596 + ret = vc4_rcl_render_config_surface_setup(exec, &setup, 597 + &setup.color_write, 598 + &args->color_write); 599 + if (ret) 600 + return ret; 601 + 602 + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); 603 + if (ret) 604 + return ret; 605 + 606 + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); 607 + if (ret) 608 + return ret; 609 + 610 + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); 611 + if (ret) 612 + return ret; 613 + 614 + ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write, 615 + &args->msaa_color_write); 616 + if (ret) 617 + return ret; 618 + 619 + ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write, 620 + &args->msaa_zs_write); 621 + if (ret) 622 + return ret; 623 + 624 + /* We shouldn't even have the job submitted to us if there's no 625 + * surface to write out. 626 + */ 627 + if (!setup.color_write && !setup.zs_write && 628 + !setup.msaa_color_write && !setup.msaa_zs_write) { 629 + DRM_ERROR("RCL requires color or Z/S write\n"); 630 + return -EINVAL; 631 + } 632 + 633 + return vc4_create_rcl_bo(dev, exec, &setup); 634 + }

+63

drivers/gpu/drm/vc4/vc4_trace.h

··· 1 + /* 2 + * Copyright (C) 2015 Broadcom 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) 10 + #define _VC4_TRACE_H_ 11 + 12 + #include <linux/stringify.h> 13 + #include <linux/types.h> 14 + #include <linux/tracepoint.h> 15 + 16 + #undef TRACE_SYSTEM 17 + #define TRACE_SYSTEM vc4 18 + #define TRACE_INCLUDE_FILE vc4_trace 19 + 20 + TRACE_EVENT(vc4_wait_for_seqno_begin, 21 + TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout), 22 + TP_ARGS(dev, seqno, timeout), 23 + 24 + TP_STRUCT__entry( 25 + __field(u32, dev) 26 + __field(u64, seqno) 27 + __field(u64, timeout) 28 + ), 29 + 30 + TP_fast_assign( 31 + __entry->dev = dev->primary->index; 32 + __entry->seqno = seqno; 33 + __entry->timeout = timeout; 34 + ), 35 + 36 + TP_printk("dev=%u, seqno=%llu, timeout=%llu", 37 + __entry->dev, __entry->seqno, __entry->timeout) 38 + ); 39 + 40 + TRACE_EVENT(vc4_wait_for_seqno_end, 41 + TP_PROTO(struct drm_device *dev, uint64_t seqno), 42 + TP_ARGS(dev, seqno), 43 + 44 + TP_STRUCT__entry( 45 + __field(u32, dev) 46 + __field(u64, seqno) 47 + ), 48 + 49 + TP_fast_assign( 50 + __entry->dev = dev->primary->index; 51 + __entry->seqno = seqno; 52 + ), 53 + 54 + TP_printk("dev=%u, seqno=%llu", 55 + __entry->dev, __entry->seqno) 56 + ); 57 + 58 + #endif /* _VC4_TRACE_H_ */ 59 + 60 + /* This part must be outside protection */ 61 + #undef TRACE_INCLUDE_PATH 62 + #define TRACE_INCLUDE_PATH . 63 + #include <trace/define_trace.h>

+14

drivers/gpu/drm/vc4/vc4_trace_points.c

··· 1 + /* 2 + * Copyright (C) 2015 Broadcom 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include "vc4_drv.h" 10 + 11 + #ifndef __CHECKER__ 12 + #define CREATE_TRACE_POINTS 13 + #include "vc4_trace.h" 14 + #endif

+37

drivers/gpu/drm/vc4/vc4_v3d.c

··· 144 144 } 145 145 #endif /* CONFIG_DEBUG_FS */ 146 146 147 + /* 148 + * Asks the firmware to turn on power to the V3D engine. 149 + * 150 + * This may be doable with just the clocks interface, though this 151 + * packet does some other register setup from the firmware, too. 152 + */ 153 + int 154 + vc4_v3d_set_power(struct vc4_dev *vc4, bool on) 155 + { 156 + if (on) 157 + return pm_generic_poweroff(&vc4->v3d->pdev->dev); 158 + else 159 + return pm_generic_resume(&vc4->v3d->pdev->dev); 160 + } 161 + 147 162 static void vc4_v3d_init_hw(struct drm_device *dev) 148 163 { 149 164 struct vc4_dev *vc4 = to_vc4_dev(dev); ··· 176 161 struct drm_device *drm = dev_get_drvdata(master); 177 162 struct vc4_dev *vc4 = to_vc4_dev(drm); 178 163 struct vc4_v3d *v3d = NULL; 164 + int ret; 179 165 180 166 v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); 181 167 if (!v3d) ··· 196 180 return -EINVAL; 197 181 } 198 182 183 + /* Reset the binner overflow address/size at setup, to be sure 184 + * we don't reuse an old one. 185 + */ 186 + V3D_WRITE(V3D_BPOA, 0); 187 + V3D_WRITE(V3D_BPOS, 0); 188 + 199 189 vc4_v3d_init_hw(drm); 190 + 191 + ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); 192 + if (ret) { 193 + DRM_ERROR("Failed to install IRQ handler\n"); 194 + return ret; 195 + } 200 196 201 197 return 0; 202 198 } ··· 218 190 { 219 191 struct drm_device *drm = dev_get_drvdata(master); 220 192 struct vc4_dev *vc4 = to_vc4_dev(drm); 193 + 194 + drm_irq_uninstall(drm); 195 + 196 + /* Disable the binner's overflow memory address, so the next 197 + * driver probe (if any) doesn't try to reuse our old 198 + * allocation. 199 + */ 200 + V3D_WRITE(V3D_BPOA, 0); 201 + V3D_WRITE(V3D_BPOS, 0); 221 202 222 203 vc4->v3d = NULL; 223 204 }

+900

drivers/gpu/drm/vc4/vc4_validate.c

··· 1 + /* 2 + * Copyright © 2014 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + /** 25 + * Command list validator for VC4. 26 + * 27 + * The VC4 has no IOMMU between it and system memory. So, a user with 28 + * access to execute command lists could escalate privilege by 29 + * overwriting system memory (drawing to it as a framebuffer) or 30 + * reading system memory it shouldn't (reading it as a texture, or 31 + * uniform data, or vertex data). 32 + * 33 + * This validates command lists to ensure that all accesses are within 34 + * the bounds of the GEM objects referenced. It explicitly whitelists 35 + * packets, and looks at the offsets in any address fields to make 36 + * sure they're constrained within the BOs they reference. 37 + * 38 + * Note that because of the validation that's happening anyway, this 39 + * is where GEM relocation processing happens. 40 + */ 41 + 42 + #include "uapi/drm/vc4_drm.h" 43 + #include "vc4_drv.h" 44 + #include "vc4_packet.h" 45 + 46 + #define VALIDATE_ARGS \ 47 + struct vc4_exec_info *exec, \ 48 + void *validated, \ 49 + void *untrusted 50 + 51 + /** Return the width in pixels of a 64-byte microtile. */ 52 + static uint32_t 53 + utile_width(int cpp) 54 + { 55 + switch (cpp) { 56 + case 1: 57 + case 2: 58 + return 8; 59 + case 4: 60 + return 4; 61 + case 8: 62 + return 2; 63 + default: 64 + DRM_ERROR("unknown cpp: %d\n", cpp); 65 + return 1; 66 + } 67 + } 68 + 69 + /** Return the height in pixels of a 64-byte microtile. */ 70 + static uint32_t 71 + utile_height(int cpp) 72 + { 73 + switch (cpp) { 74 + case 1: 75 + return 8; 76 + case 2: 77 + case 4: 78 + case 8: 79 + return 4; 80 + default: 81 + DRM_ERROR("unknown cpp: %d\n", cpp); 82 + return 1; 83 + } 84 + } 85 + 86 + /** 87 + * The texture unit decides what tiling format a particular miplevel is using 88 + * this function, so we lay out our miptrees accordingly. 89 + */ 90 + static bool 91 + size_is_lt(uint32_t width, uint32_t height, int cpp) 92 + { 93 + return (width <= 4 * utile_width(cpp) || 94 + height <= 4 * utile_height(cpp)); 95 + } 96 + 97 + struct drm_gem_cma_object * 98 + vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) 99 + { 100 + struct drm_gem_cma_object *obj; 101 + struct vc4_bo *bo; 102 + 103 + if (hindex >= exec->bo_count) { 104 + DRM_ERROR("BO index %d greater than BO count %d\n", 105 + hindex, exec->bo_count); 106 + return NULL; 107 + } 108 + obj = exec->bo[hindex]; 109 + bo = to_vc4_bo(&obj->base); 110 + 111 + if (bo->validated_shader) { 112 + DRM_ERROR("Trying to use shader BO as something other than " 113 + "a shader\n"); 114 + return NULL; 115 + } 116 + 117 + return obj; 118 + } 119 + 120 + static struct drm_gem_cma_object * 121 + vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index) 122 + { 123 + return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]); 124 + } 125 + 126 + static bool 127 + validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos) 128 + { 129 + /* Note that the untrusted pointer passed to these functions is 130 + * incremented past the packet byte. 131 + */ 132 + return (untrusted - 1 == exec->bin_u + pos); 133 + } 134 + 135 + static uint32_t 136 + gl_shader_rec_size(uint32_t pointer_bits) 137 + { 138 + uint32_t attribute_count = pointer_bits & 7; 139 + bool extended = pointer_bits & 8; 140 + 141 + if (attribute_count == 0) 142 + attribute_count = 8; 143 + 144 + if (extended) 145 + return 100 + attribute_count * 4; 146 + else 147 + return 36 + attribute_count * 8; 148 + } 149 + 150 + bool 151 + vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, 152 + uint32_t offset, uint8_t tiling_format, 153 + uint32_t width, uint32_t height, uint8_t cpp) 154 + { 155 + uint32_t aligned_width, aligned_height, stride, size; 156 + uint32_t utile_w = utile_width(cpp); 157 + uint32_t utile_h = utile_height(cpp); 158 + 159 + /* The shaded vertex format stores signed 12.4 fixed point 160 + * (-2048,2047) offsets from the viewport center, so we should 161 + * never have a render target larger than 4096. The texture 162 + * unit can only sample from 2048x2048, so it's even more 163 + * restricted. This lets us avoid worrying about overflow in 164 + * our math. 165 + */ 166 + if (width > 4096 || height > 4096) { 167 + DRM_ERROR("Surface dimesions (%d,%d) too large", width, height); 168 + return false; 169 + } 170 + 171 + switch (tiling_format) { 172 + case VC4_TILING_FORMAT_LINEAR: 173 + aligned_width = round_up(width, utile_w); 174 + aligned_height = height; 175 + break; 176 + case VC4_TILING_FORMAT_T: 177 + aligned_width = round_up(width, utile_w * 8); 178 + aligned_height = round_up(height, utile_h * 8); 179 + break; 180 + case VC4_TILING_FORMAT_LT: 181 + aligned_width = round_up(width, utile_w); 182 + aligned_height = round_up(height, utile_h); 183 + break; 184 + default: 185 + DRM_ERROR("buffer tiling %d unsupported\n", tiling_format); 186 + return false; 187 + } 188 + 189 + stride = aligned_width * cpp; 190 + size = stride * aligned_height; 191 + 192 + if (size + offset < size || 193 + size + offset > fbo->base.size) { 194 + DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n", 195 + width, height, 196 + aligned_width, aligned_height, 197 + size, offset, fbo->base.size); 198 + return false; 199 + } 200 + 201 + return true; 202 + } 203 + 204 + static int 205 + validate_flush(VALIDATE_ARGS) 206 + { 207 + if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) { 208 + DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n"); 209 + return -EINVAL; 210 + } 211 + exec->found_flush = true; 212 + 213 + return 0; 214 + } 215 + 216 + static int 217 + validate_start_tile_binning(VALIDATE_ARGS) 218 + { 219 + if (exec->found_start_tile_binning_packet) { 220 + DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n"); 221 + return -EINVAL; 222 + } 223 + exec->found_start_tile_binning_packet = true; 224 + 225 + if (!exec->found_tile_binning_mode_config_packet) { 226 + DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); 227 + return -EINVAL; 228 + } 229 + 230 + return 0; 231 + } 232 + 233 + static int 234 + validate_increment_semaphore(VALIDATE_ARGS) 235 + { 236 + if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) { 237 + DRM_ERROR("Bin CL must end with " 238 + "VC4_PACKET_INCREMENT_SEMAPHORE\n"); 239 + return -EINVAL; 240 + } 241 + exec->found_increment_semaphore_packet = true; 242 + 243 + return 0; 244 + } 245 + 246 + static int 247 + validate_indexed_prim_list(VALIDATE_ARGS) 248 + { 249 + struct drm_gem_cma_object *ib; 250 + uint32_t length = *(uint32_t *)(untrusted + 1); 251 + uint32_t offset = *(uint32_t *)(untrusted + 5); 252 + uint32_t max_index = *(uint32_t *)(untrusted + 9); 253 + uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1; 254 + struct vc4_shader_state *shader_state; 255 + 256 + /* Check overflow condition */ 257 + if (exec->shader_state_count == 0) { 258 + DRM_ERROR("shader state must precede primitives\n"); 259 + return -EINVAL; 260 + } 261 + shader_state = &exec->shader_state[exec->shader_state_count - 1]; 262 + 263 + if (max_index > shader_state->max_index) 264 + shader_state->max_index = max_index; 265 + 266 + ib = vc4_use_handle(exec, 0); 267 + if (!ib) 268 + return -EINVAL; 269 + 270 + if (offset > ib->base.size || 271 + (ib->base.size - offset) / index_size < length) { 272 + DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n", 273 + offset, length, index_size, ib->base.size); 274 + return -EINVAL; 275 + } 276 + 277 + *(uint32_t *)(validated + 5) = ib->paddr + offset; 278 + 279 + return 0; 280 + } 281 + 282 + static int 283 + validate_gl_array_primitive(VALIDATE_ARGS) 284 + { 285 + uint32_t length = *(uint32_t *)(untrusted + 1); 286 + uint32_t base_index = *(uint32_t *)(untrusted + 5); 287 + uint32_t max_index; 288 + struct vc4_shader_state *shader_state; 289 + 290 + /* Check overflow condition */ 291 + if (exec->shader_state_count == 0) { 292 + DRM_ERROR("shader state must precede primitives\n"); 293 + return -EINVAL; 294 + } 295 + shader_state = &exec->shader_state[exec->shader_state_count - 1]; 296 + 297 + if (length + base_index < length) { 298 + DRM_ERROR("primitive vertex count overflow\n"); 299 + return -EINVAL; 300 + } 301 + max_index = length + base_index - 1; 302 + 303 + if (max_index > shader_state->max_index) 304 + shader_state->max_index = max_index; 305 + 306 + return 0; 307 + } 308 + 309 + static int 310 + validate_gl_shader_state(VALIDATE_ARGS) 311 + { 312 + uint32_t i = exec->shader_state_count++; 313 + 314 + if (i >= exec->shader_state_size) { 315 + DRM_ERROR("More requests for shader states than declared\n"); 316 + return -EINVAL; 317 + } 318 + 319 + exec->shader_state[i].addr = *(uint32_t *)untrusted; 320 + exec->shader_state[i].max_index = 0; 321 + 322 + if (exec->shader_state[i].addr & ~0xf) { 323 + DRM_ERROR("high bits set in GL shader rec reference\n"); 324 + return -EINVAL; 325 + } 326 + 327 + *(uint32_t *)validated = (exec->shader_rec_p + 328 + exec->shader_state[i].addr); 329 + 330 + exec->shader_rec_p += 331 + roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16); 332 + 333 + return 0; 334 + } 335 + 336 + static int 337 + validate_tile_binning_config(VALIDATE_ARGS) 338 + { 339 + struct drm_device *dev = exec->exec_bo->base.dev; 340 + struct vc4_bo *tile_bo; 341 + uint8_t flags; 342 + uint32_t tile_state_size, tile_alloc_size; 343 + uint32_t tile_count; 344 + 345 + if (exec->found_tile_binning_mode_config_packet) { 346 + DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); 347 + return -EINVAL; 348 + } 349 + exec->found_tile_binning_mode_config_packet = true; 350 + 351 + exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); 352 + exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); 353 + tile_count = exec->bin_tiles_x * exec->bin_tiles_y; 354 + flags = *(uint8_t *)(untrusted + 14); 355 + 356 + if (exec->bin_tiles_x == 0 || 357 + exec->bin_tiles_y == 0) { 358 + DRM_ERROR("Tile binning config of %dx%d too small\n", 359 + exec->bin_tiles_x, exec->bin_tiles_y); 360 + return -EINVAL; 361 + } 362 + 363 + if (flags & (VC4_BIN_CONFIG_DB_NON_MS | 364 + VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) { 365 + DRM_ERROR("unsupported binning config flags 0x%02x\n", flags); 366 + return -EINVAL; 367 + } 368 + 369 + /* The tile state data array is 48 bytes per tile, and we put it at 370 + * the start of a BO containing both it and the tile alloc. 371 + */ 372 + tile_state_size = 48 * tile_count; 373 + 374 + /* Since the tile alloc array will follow us, align. */ 375 + exec->tile_alloc_offset = roundup(tile_state_size, 4096); 376 + 377 + *(uint8_t *)(validated + 14) = 378 + ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | 379 + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | 380 + VC4_BIN_CONFIG_AUTO_INIT_TSDA | 381 + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, 382 + VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | 383 + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, 384 + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); 385 + 386 + /* Initial block size. */ 387 + tile_alloc_size = 32 * tile_count; 388 + 389 + /* 390 + * The initial allocation gets rounded to the next 256 bytes before 391 + * the hardware starts fulfilling further allocations. 392 + */ 393 + tile_alloc_size = roundup(tile_alloc_size, 256); 394 + 395 + /* Add space for the extra allocations. This is what gets used first, 396 + * before overflow memory. It must have at least 4096 bytes, but we 397 + * want to avoid overflow memory usage if possible. 398 + */ 399 + tile_alloc_size += 1024 * 1024; 400 + 401 + tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size, 402 + true); 403 + exec->tile_bo = &tile_bo->base; 404 + if (!exec->tile_bo) 405 + return -ENOMEM; 406 + list_add_tail(&tile_bo->unref_head, &exec->unref_list); 407 + 408 + /* tile alloc address. */ 409 + *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + 410 + exec->tile_alloc_offset); 411 + /* tile alloc size. */ 412 + *(uint32_t *)(validated + 4) = tile_alloc_size; 413 + /* tile state address. */ 414 + *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; 415 + 416 + return 0; 417 + } 418 + 419 + static int 420 + validate_gem_handles(VALIDATE_ARGS) 421 + { 422 + memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); 423 + return 0; 424 + } 425 + 426 + #define VC4_DEFINE_PACKET(packet, func) \ 427 + [packet] = { packet ## _SIZE, #packet, func } 428 + 429 + static const struct cmd_info { 430 + uint16_t len; 431 + const char *name; 432 + int (*func)(struct vc4_exec_info *exec, void *validated, 433 + void *untrusted); 434 + } cmd_info[] = { 435 + VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL), 436 + VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL), 437 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush), 438 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL), 439 + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 440 + validate_start_tile_binning), 441 + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 442 + validate_increment_semaphore), 443 + 444 + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 445 + validate_indexed_prim_list), 446 + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 447 + validate_gl_array_primitive), 448 + 449 + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL), 450 + 451 + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state), 452 + 453 + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL), 454 + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL), 455 + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL), 456 + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL), 457 + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL), 458 + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL), 459 + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL), 460 + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL), 461 + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), 462 + /* Note: The docs say this was also 105, but it was 106 in the 463 + * initial userland code drop. 464 + */ 465 + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL), 466 + 467 + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 468 + validate_tile_binning_config), 469 + 470 + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles), 471 + }; 472 + 473 + int 474 + vc4_validate_bin_cl(struct drm_device *dev, 475 + void *validated, 476 + void *unvalidated, 477 + struct vc4_exec_info *exec) 478 + { 479 + uint32_t len = exec->args->bin_cl_size; 480 + uint32_t dst_offset = 0; 481 + uint32_t src_offset = 0; 482 + 483 + while (src_offset < len) { 484 + void *dst_pkt = validated + dst_offset; 485 + void *src_pkt = unvalidated + src_offset; 486 + u8 cmd = *(uint8_t *)src_pkt; 487 + const struct cmd_info *info; 488 + 489 + if (cmd >= ARRAY_SIZE(cmd_info)) { 490 + DRM_ERROR("0x%08x: packet %d out of bounds\n", 491 + src_offset, cmd); 492 + return -EINVAL; 493 + } 494 + 495 + info = &cmd_info[cmd]; 496 + if (!info->name) { 497 + DRM_ERROR("0x%08x: packet %d invalid\n", 498 + src_offset, cmd); 499 + return -EINVAL; 500 + } 501 + 502 + if (src_offset + info->len > len) { 503 + DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " 504 + "exceeds bounds (0x%08x)\n", 505 + src_offset, cmd, info->name, info->len, 506 + src_offset + len); 507 + return -EINVAL; 508 + } 509 + 510 + if (cmd != VC4_PACKET_GEM_HANDLES) 511 + memcpy(dst_pkt, src_pkt, info->len); 512 + 513 + if (info->func && info->func(exec, 514 + dst_pkt + 1, 515 + src_pkt + 1)) { 516 + DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n", 517 + src_offset, cmd, info->name); 518 + return -EINVAL; 519 + } 520 + 521 + src_offset += info->len; 522 + /* GEM handle loading doesn't produce HW packets. */ 523 + if (cmd != VC4_PACKET_GEM_HANDLES) 524 + dst_offset += info->len; 525 + 526 + /* When the CL hits halt, it'll stop reading anything else. */ 527 + if (cmd == VC4_PACKET_HALT) 528 + break; 529 + } 530 + 531 + exec->ct0ea = exec->ct0ca + dst_offset; 532 + 533 + if (!exec->found_start_tile_binning_packet) { 534 + DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); 535 + return -EINVAL; 536 + } 537 + 538 + /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The 539 + * semaphore is used to trigger the render CL to start up, and the 540 + * FLUSH is what caps the bin lists with 541 + * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main 542 + * render CL when they get called to) and actually triggers the queued 543 + * semaphore increment. 544 + */ 545 + if (!exec->found_increment_semaphore_packet || !exec->found_flush) { 546 + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + " 547 + "VC4_PACKET_FLUSH\n"); 548 + return -EINVAL; 549 + } 550 + 551 + return 0; 552 + } 553 + 554 + static bool 555 + reloc_tex(struct vc4_exec_info *exec, 556 + void *uniform_data_u, 557 + struct vc4_texture_sample_info *sample, 558 + uint32_t texture_handle_index) 559 + 560 + { 561 + struct drm_gem_cma_object *tex; 562 + uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); 563 + uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]); 564 + uint32_t p2 = (sample->p_offset[2] != ~0 ? 565 + *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0); 566 + uint32_t p3 = (sample->p_offset[3] != ~0 ? 567 + *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); 568 + uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; 569 + uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; 570 + uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); 571 + uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); 572 + uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); 573 + uint32_t cpp, tiling_format, utile_w, utile_h; 574 + uint32_t i; 575 + uint32_t cube_map_stride = 0; 576 + enum vc4_texture_data_type type; 577 + 578 + tex = vc4_use_bo(exec, texture_handle_index); 579 + if (!tex) 580 + return false; 581 + 582 + if (sample->is_direct) { 583 + uint32_t remaining_size = tex->base.size - p0; 584 + 585 + if (p0 > tex->base.size - 4) { 586 + DRM_ERROR("UBO offset greater than UBO size\n"); 587 + goto fail; 588 + } 589 + if (p1 > remaining_size - 4) { 590 + DRM_ERROR("UBO clamp would allow reads " 591 + "outside of UBO\n"); 592 + goto fail; 593 + } 594 + *validated_p0 = tex->paddr + p0; 595 + return true; 596 + } 597 + 598 + if (width == 0) 599 + width = 2048; 600 + if (height == 0) 601 + height = 2048; 602 + 603 + if (p0 & VC4_TEX_P0_CMMODE_MASK) { 604 + if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == 605 + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) 606 + cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; 607 + if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == 608 + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { 609 + if (cube_map_stride) { 610 + DRM_ERROR("Cube map stride set twice\n"); 611 + goto fail; 612 + } 613 + 614 + cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; 615 + } 616 + if (!cube_map_stride) { 617 + DRM_ERROR("Cube map stride not set\n"); 618 + goto fail; 619 + } 620 + } 621 + 622 + type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | 623 + (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); 624 + 625 + switch (type) { 626 + case VC4_TEXTURE_TYPE_RGBA8888: 627 + case VC4_TEXTURE_TYPE_RGBX8888: 628 + case VC4_TEXTURE_TYPE_RGBA32R: 629 + cpp = 4; 630 + break; 631 + case VC4_TEXTURE_TYPE_RGBA4444: 632 + case VC4_TEXTURE_TYPE_RGBA5551: 633 + case VC4_TEXTURE_TYPE_RGB565: 634 + case VC4_TEXTURE_TYPE_LUMALPHA: 635 + case VC4_TEXTURE_TYPE_S16F: 636 + case VC4_TEXTURE_TYPE_S16: 637 + cpp = 2; 638 + break; 639 + case VC4_TEXTURE_TYPE_LUMINANCE: 640 + case VC4_TEXTURE_TYPE_ALPHA: 641 + case VC4_TEXTURE_TYPE_S8: 642 + cpp = 1; 643 + break; 644 + case VC4_TEXTURE_TYPE_ETC1: 645 + case VC4_TEXTURE_TYPE_BW1: 646 + case VC4_TEXTURE_TYPE_A4: 647 + case VC4_TEXTURE_TYPE_A1: 648 + case VC4_TEXTURE_TYPE_RGBA64: 649 + case VC4_TEXTURE_TYPE_YUV422R: 650 + default: 651 + DRM_ERROR("Texture format %d unsupported\n", type); 652 + goto fail; 653 + } 654 + utile_w = utile_width(cpp); 655 + utile_h = utile_height(cpp); 656 + 657 + if (type == VC4_TEXTURE_TYPE_RGBA32R) { 658 + tiling_format = VC4_TILING_FORMAT_LINEAR; 659 + } else { 660 + if (size_is_lt(width, height, cpp)) 661 + tiling_format = VC4_TILING_FORMAT_LT; 662 + else 663 + tiling_format = VC4_TILING_FORMAT_T; 664 + } 665 + 666 + if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, 667 + tiling_format, width, height, cpp)) { 668 + goto fail; 669 + } 670 + 671 + /* The mipmap levels are stored before the base of the texture. Make 672 + * sure there is actually space in the BO. 673 + */ 674 + for (i = 1; i <= miplevels; i++) { 675 + uint32_t level_width = max(width >> i, 1u); 676 + uint32_t level_height = max(height >> i, 1u); 677 + uint32_t aligned_width, aligned_height; 678 + uint32_t level_size; 679 + 680 + /* Once the levels get small enough, they drop from T to LT. */ 681 + if (tiling_format == VC4_TILING_FORMAT_T && 682 + size_is_lt(level_width, level_height, cpp)) { 683 + tiling_format = VC4_TILING_FORMAT_LT; 684 + } 685 + 686 + switch (tiling_format) { 687 + case VC4_TILING_FORMAT_T: 688 + aligned_width = round_up(level_width, utile_w * 8); 689 + aligned_height = round_up(level_height, utile_h * 8); 690 + break; 691 + case VC4_TILING_FORMAT_LT: 692 + aligned_width = round_up(level_width, utile_w); 693 + aligned_height = round_up(level_height, utile_h); 694 + break; 695 + default: 696 + aligned_width = round_up(level_width, utile_w); 697 + aligned_height = level_height; 698 + break; 699 + } 700 + 701 + level_size = aligned_width * cpp * aligned_height; 702 + 703 + if (offset < level_size) { 704 + DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db " 705 + "overflowed buffer bounds (offset %d)\n", 706 + i, level_width, level_height, 707 + aligned_width, aligned_height, 708 + level_size, offset); 709 + goto fail; 710 + } 711 + 712 + offset -= level_size; 713 + } 714 + 715 + *validated_p0 = tex->paddr + p0; 716 + 717 + return true; 718 + fail: 719 + DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); 720 + DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1); 721 + DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2); 722 + DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3); 723 + return false; 724 + } 725 + 726 + static int 727 + validate_gl_shader_rec(struct drm_device *dev, 728 + struct vc4_exec_info *exec, 729 + struct vc4_shader_state *state) 730 + { 731 + uint32_t *src_handles; 732 + void *pkt_u, *pkt_v; 733 + static const uint32_t shader_reloc_offsets[] = { 734 + 4, /* fs */ 735 + 16, /* vs */ 736 + 28, /* cs */ 737 + }; 738 + uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets); 739 + struct drm_gem_cma_object *bo[shader_reloc_count + 8]; 740 + uint32_t nr_attributes, nr_relocs, packet_size; 741 + int i; 742 + 743 + nr_attributes = state->addr & 0x7; 744 + if (nr_attributes == 0) 745 + nr_attributes = 8; 746 + packet_size = gl_shader_rec_size(state->addr); 747 + 748 + nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes; 749 + if (nr_relocs * 4 > exec->shader_rec_size) { 750 + DRM_ERROR("overflowed shader recs reading %d handles " 751 + "from %d bytes left\n", 752 + nr_relocs, exec->shader_rec_size); 753 + return -EINVAL; 754 + } 755 + src_handles = exec->shader_rec_u; 756 + exec->shader_rec_u += nr_relocs * 4; 757 + exec->shader_rec_size -= nr_relocs * 4; 758 + 759 + if (packet_size > exec->shader_rec_size) { 760 + DRM_ERROR("overflowed shader recs copying %db packet " 761 + "from %d bytes left\n", 762 + packet_size, exec->shader_rec_size); 763 + return -EINVAL; 764 + } 765 + pkt_u = exec->shader_rec_u; 766 + pkt_v = exec->shader_rec_v; 767 + memcpy(pkt_v, pkt_u, packet_size); 768 + exec->shader_rec_u += packet_size; 769 + /* Shader recs have to be aligned to 16 bytes (due to the attribute 770 + * flags being in the low bytes), so round the next validated shader 771 + * rec address up. This should be safe, since we've got so many 772 + * relocations in a shader rec packet. 773 + */ 774 + BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4); 775 + exec->shader_rec_v += roundup(packet_size, 16); 776 + exec->shader_rec_size -= packet_size; 777 + 778 + if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) { 779 + DRM_ERROR("Multi-threaded fragment shaders not supported.\n"); 780 + return -EINVAL; 781 + } 782 + 783 + for (i = 0; i < shader_reloc_count; i++) { 784 + if (src_handles[i] > exec->bo_count) { 785 + DRM_ERROR("Shader handle %d too big\n", src_handles[i]); 786 + return -EINVAL; 787 + } 788 + 789 + bo[i] = exec->bo[src_handles[i]]; 790 + if (!bo[i]) 791 + return -EINVAL; 792 + } 793 + for (i = shader_reloc_count; i < nr_relocs; i++) { 794 + bo[i] = vc4_use_bo(exec, src_handles[i]); 795 + if (!bo[i]) 796 + return -EINVAL; 797 + } 798 + 799 + for (i = 0; i < shader_reloc_count; i++) { 800 + struct vc4_validated_shader_info *validated_shader; 801 + uint32_t o = shader_reloc_offsets[i]; 802 + uint32_t src_offset = *(uint32_t *)(pkt_u + o); 803 + uint32_t *texture_handles_u; 804 + void *uniform_data_u; 805 + uint32_t tex; 806 + 807 + *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; 808 + 809 + if (src_offset != 0) { 810 + DRM_ERROR("Shaders must be at offset 0 of " 811 + "the BO.\n"); 812 + return -EINVAL; 813 + } 814 + 815 + validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; 816 + if (!validated_shader) 817 + return -EINVAL; 818 + 819 + if (validated_shader->uniforms_src_size > 820 + exec->uniforms_size) { 821 + DRM_ERROR("Uniforms src buffer overflow\n"); 822 + return -EINVAL; 823 + } 824 + 825 + texture_handles_u = exec->uniforms_u; 826 + uniform_data_u = (texture_handles_u + 827 + validated_shader->num_texture_samples); 828 + 829 + memcpy(exec->uniforms_v, uniform_data_u, 830 + validated_shader->uniforms_size); 831 + 832 + for (tex = 0; 833 + tex < validated_shader->num_texture_samples; 834 + tex++) { 835 + if (!reloc_tex(exec, 836 + uniform_data_u, 837 + &validated_shader->texture_samples[tex], 838 + texture_handles_u[tex])) { 839 + return -EINVAL; 840 + } 841 + } 842 + 843 + *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; 844 + 845 + exec->uniforms_u += validated_shader->uniforms_src_size; 846 + exec->uniforms_v += validated_shader->uniforms_size; 847 + exec->uniforms_p += validated_shader->uniforms_size; 848 + } 849 + 850 + for (i = 0; i < nr_attributes; i++) { 851 + struct drm_gem_cma_object *vbo = 852 + bo[ARRAY_SIZE(shader_reloc_offsets) + i]; 853 + uint32_t o = 36 + i * 8; 854 + uint32_t offset = *(uint32_t *)(pkt_u + o + 0); 855 + uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1; 856 + uint32_t stride = *(uint8_t *)(pkt_u + o + 5); 857 + uint32_t max_index; 858 + 859 + if (state->addr & 0x8) 860 + stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; 861 + 862 + if (vbo->base.size < offset || 863 + vbo->base.size - offset < attr_size) { 864 + DRM_ERROR("BO offset overflow (%d + %d > %d)\n", 865 + offset, attr_size, vbo->base.size); 866 + return -EINVAL; 867 + } 868 + 869 + if (stride != 0) { 870 + max_index = ((vbo->base.size - offset - attr_size) / 871 + stride); 872 + if (state->max_index > max_index) { 873 + DRM_ERROR("primitives use index %d out of " 874 + "supplied %d\n", 875 + state->max_index, max_index); 876 + return -EINVAL; 877 + } 878 + } 879 + 880 + *(uint32_t *)(pkt_v + o) = vbo->paddr + offset; 881 + } 882 + 883 + return 0; 884 + } 885 + 886 + int 887 + vc4_validate_shader_recs(struct drm_device *dev, 888 + struct vc4_exec_info *exec) 889 + { 890 + uint32_t i; 891 + int ret = 0; 892 + 893 + for (i = 0; i < exec->shader_state_count; i++) { 894 + ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]); 895 + if (ret) 896 + return ret; 897 + } 898 + 899 + return ret; 900 + }

+141

include/uapi/drm/vc4_drm.h

··· 26 26 27 27 #include "drm.h" 28 28 29 + #define DRM_VC4_SUBMIT_CL 0x00 30 + #define DRM_VC4_WAIT_SEQNO 0x01 31 + #define DRM_VC4_WAIT_BO 0x02 29 32 #define DRM_VC4_CREATE_BO 0x03 30 33 #define DRM_VC4_MMAP_BO 0x04 31 34 #define DRM_VC4_CREATE_SHADER_BO 0x05 32 35 36 + #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) 37 + #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) 38 + #define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) 33 39 #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) 34 40 #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) 35 41 #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) 42 + 43 + struct drm_vc4_submit_rcl_surface { 44 + __u32 hindex; /* Handle index, or ~0 if not present. */ 45 + __u32 offset; /* Offset to start of buffer. */ 46 + /* 47 + * Bits for either render config (color_write) or load/store packet. 48 + * Bits should all be 0 for MSAA load/stores. 49 + */ 50 + __u16 bits; 51 + 52 + #define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0) 53 + __u16 flags; 54 + }; 55 + 56 + /** 57 + * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D 58 + * engine. 59 + * 60 + * Drivers typically use GPU BOs to store batchbuffers / command lists and 61 + * their associated state. However, because the VC4 lacks an MMU, we have to 62 + * do validation of memory accesses by the GPU commands. If we were to store 63 + * our commands in BOs, we'd need to do uncached readback from them to do the 64 + * validation process, which is too expensive. Instead, userspace accumulates 65 + * commands and associated state in plain memory, then the kernel copies the 66 + * data to its own address space, and then validates and stores it in a GPU 67 + * BO. 68 + */ 69 + struct drm_vc4_submit_cl { 70 + /* Pointer to the binner command list. 71 + * 72 + * This is the first set of commands executed, which runs the 73 + * coordinate shader to determine where primitives land on the screen, 74 + * then writes out the state updates and draw calls necessary per tile 75 + * to the tile allocation BO. 76 + */ 77 + __u64 bin_cl; 78 + 79 + /* Pointer to the shader records. 80 + * 81 + * Shader records are the structures read by the hardware that contain 82 + * pointers to uniforms, shaders, and vertex attributes. The 83 + * reference to the shader record has enough information to determine 84 + * how many pointers are necessary (fixed number for shaders/uniforms, 85 + * and an attribute count), so those BO indices into bo_handles are 86 + * just stored as __u32s before each shader record passed in. 87 + */ 88 + __u64 shader_rec; 89 + 90 + /* Pointer to uniform data and texture handles for the textures 91 + * referenced by the shader. 92 + * 93 + * For each shader state record, there is a set of uniform data in the 94 + * order referenced by the record (FS, VS, then CS). Each set of 95 + * uniform data has a __u32 index into bo_handles per texture 96 + * sample operation, in the order the QPU_W_TMUn_S writes appear in 97 + * the program. Following the texture BO handle indices is the actual 98 + * uniform data. 99 + * 100 + * The individual uniform state blocks don't have sizes passed in, 101 + * because the kernel has to determine the sizes anyway during shader 102 + * code validation. 103 + */ 104 + __u64 uniforms; 105 + __u64 bo_handles; 106 + 107 + /* Size in bytes of the binner command list. */ 108 + __u32 bin_cl_size; 109 + /* Size in bytes of the set of shader records. */ 110 + __u32 shader_rec_size; 111 + /* Number of shader records. 112 + * 113 + * This could just be computed from the contents of shader_records and 114 + * the address bits of references to them from the bin CL, but it 115 + * keeps the kernel from having to resize some allocations it makes. 116 + */ 117 + __u32 shader_rec_count; 118 + /* Size in bytes of the uniform state. */ 119 + __u32 uniforms_size; 120 + 121 + /* Number of BO handles passed in (size is that times 4). */ 122 + __u32 bo_handle_count; 123 + 124 + /* RCL setup: */ 125 + __u16 width; 126 + __u16 height; 127 + __u8 min_x_tile; 128 + __u8 min_y_tile; 129 + __u8 max_x_tile; 130 + __u8 max_y_tile; 131 + struct drm_vc4_submit_rcl_surface color_read; 132 + struct drm_vc4_submit_rcl_surface color_write; 133 + struct drm_vc4_submit_rcl_surface zs_read; 134 + struct drm_vc4_submit_rcl_surface zs_write; 135 + struct drm_vc4_submit_rcl_surface msaa_color_write; 136 + struct drm_vc4_submit_rcl_surface msaa_zs_write; 137 + __u32 clear_color[2]; 138 + __u32 clear_z; 139 + __u8 clear_s; 140 + 141 + __u32 pad:24; 142 + 143 + #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) 144 + __u32 flags; 145 + 146 + /* Returned value of the seqno of this render job (for the 147 + * wait ioctl). 148 + */ 149 + __u64 seqno; 150 + }; 151 + 152 + /** 153 + * struct drm_vc4_wait_seqno - ioctl argument for waiting for 154 + * DRM_VC4_SUBMIT_CL completion using its returned seqno. 155 + * 156 + * timeout_ns is the timeout in nanoseconds, where "0" means "don't 157 + * block, just return the status." 158 + */ 159 + struct drm_vc4_wait_seqno { 160 + __u64 seqno; 161 + __u64 timeout_ns; 162 + }; 163 + 164 + /** 165 + * struct drm_vc4_wait_bo - ioctl argument for waiting for 166 + * completion of the last DRM_VC4_SUBMIT_CL on a BO. 167 + * 168 + * This is useful for cases where multiple processes might be 169 + * rendering to a BO and you want to wait for all rendering to be 170 + * completed. 171 + */ 172 + struct drm_vc4_wait_bo { 173 + __u32 handle; 174 + __u32 pad; 175 + __u64 timeout_ns; 176 + }; 36 177 37 178 /** 38 179 * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.