Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux into drm-next

This pull request covers what's left for 4.6. Notably, it includes a
significant 3D performance improvement and a fix to HDMI hotplug
detection for the Pi2/3.

* tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux:
drm/vc4: Recognize a more specific compatible string for V3D.
dt-bindings: Add binding docs for V3D.
drm/vc4: Return -EFAULT on copy_from_user() failure
drm/vc4: Respect GPIO_ACTIVE_LOW on HDMI HPD if set in the devicetree.
drm/vc4: Let gpiolib know that we're OK with sleeping for HPD.
drm/vc4: improve throughput by pipelining binning and rendering jobs

+194 -58
+12
Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
··· 35 35 as an interrupt/status bit in the HDMI controller 36 36 itself). See bindings/pinctrl/brcm,bcm2835-gpio.txt 37 37 38 + Required properties for V3D: 39 + - compatible: Should be "brcm,bcm2835-v3d" 40 + - reg: Physical base address and length of the V3D's registers 41 + - interrupts: The interrupt number 42 + See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt 43 + 38 44 Example: 39 45 pixelvalve@7e807000 { 40 46 compatible = "brcm,bcm2835-pixelvalve2"; ··· 64 58 clocks = <&clocks BCM2835_PLLH_PIX>, 65 59 <&clocks BCM2835_CLOCK_HSM>; 66 60 clock-names = "pixel", "hdmi"; 61 + }; 62 + 63 + v3d: v3d@7ec00000 { 64 + compatible = "brcm,bcm2835-v3d"; 65 + reg = <0x7ec00000 0x1000>; 66 + interrupts = <1 10>; 67 67 }; 68 68 69 69 vc4: gpu {
+4 -3
drivers/gpu/drm/vc4/vc4_bo.c
··· 499 499 if (IS_ERR(bo)) 500 500 return PTR_ERR(bo); 501 501 502 - ret = copy_from_user(bo->base.vaddr, 502 + if (copy_from_user(bo->base.vaddr, 503 503 (void __user *)(uintptr_t)args->data, 504 - args->size); 505 - if (ret != 0) 504 + args->size)) { 505 + ret = -EFAULT; 506 506 goto fail; 507 + } 507 508 /* Clear the rest of the memory from allocating from the BO 508 509 * cache. 509 510 */
+28 -9
drivers/gpu/drm/vc4/vc4_drv.h
··· 52 52 /* Protects bo_cache and the BO stats. */ 53 53 struct mutex bo_lock; 54 54 55 - /* Sequence number for the last job queued in job_list. 55 + /* Sequence number for the last job queued in bin_job_list. 56 56 * Starts at 0 (no jobs emitted). 57 57 */ 58 58 uint64_t emit_seqno; ··· 62 62 */ 63 63 uint64_t finished_seqno; 64 64 65 - /* List of all struct vc4_exec_info for jobs to be executed. 66 - * The first job in the list is the one currently programmed 67 - * into ct0ca/ct1ca for execution. 65 + /* List of all struct vc4_exec_info for jobs to be executed in 66 + * the binner. The first job in the list is the one currently 67 + * programmed into ct0ca for execution. 68 68 */ 69 - struct list_head job_list; 69 + struct list_head bin_job_list; 70 + 71 + /* List of all struct vc4_exec_info for jobs that have 72 + * completed binning and are ready for rendering. The first 73 + * job in the list is the one currently programmed into ct1ca 74 + * for execution. 75 + */ 76 + struct list_head render_job_list; 77 + 70 78 /* List of the finished vc4_exec_infos waiting to be freed by 71 79 * job_done_work. 72 80 */ ··· 304 296 }; 305 297 306 298 static inline struct vc4_exec_info * 307 - vc4_first_job(struct vc4_dev *vc4) 299 + vc4_first_bin_job(struct vc4_dev *vc4) 308 300 { 309 - if (list_empty(&vc4->job_list)) 301 + if (list_empty(&vc4->bin_job_list)) 310 302 return NULL; 311 - return list_first_entry(&vc4->job_list, struct vc4_exec_info, head); 303 + return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head); 304 + } 305 + 306 + static inline struct vc4_exec_info * 307 + vc4_first_render_job(struct vc4_dev *vc4) 308 + { 309 + if (list_empty(&vc4->render_job_list)) 310 + return NULL; 311 + return list_first_entry(&vc4->render_job_list, 312 + struct vc4_exec_info, head); 312 313 } 313 314 314 315 /** ··· 431 414 struct drm_file *file_priv); 432 415 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 433 416 struct drm_file *file_priv); 434 - void vc4_submit_next_job(struct drm_device *dev); 417 + void vc4_submit_next_bin_job(struct drm_device *dev); 418 + void vc4_submit_next_render_job(struct drm_device *dev); 419 + void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); 435 420 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 436 421 uint64_t timeout_ns, bool interruptible); 437 422 void vc4_job_handle_completed(struct vc4_dev *vc4);
+90 -35
drivers/gpu/drm/vc4/vc4_gem.c
··· 141 141 struct vc4_dev *vc4 = to_vc4_dev(dev); 142 142 struct drm_vc4_get_hang_state *state; 143 143 struct vc4_hang_state *kernel_state; 144 - struct vc4_exec_info *exec; 144 + struct vc4_exec_info *exec[2]; 145 145 struct vc4_bo *bo; 146 146 unsigned long irqflags; 147 - unsigned int i, unref_list_count; 147 + unsigned int i, j, unref_list_count, prev_idx; 148 148 149 149 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 150 150 if (!kernel_state) ··· 153 153 state = &kernel_state->user_state; 154 154 155 155 spin_lock_irqsave(&vc4->job_lock, irqflags); 156 - exec = vc4_first_job(vc4); 157 - if (!exec) { 156 + exec[0] = vc4_first_bin_job(vc4); 157 + exec[1] = vc4_first_render_job(vc4); 158 + if (!exec[0] && !exec[1]) { 158 159 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 159 160 return; 160 161 } 161 162 162 - unref_list_count = 0; 163 - list_for_each_entry(bo, &exec->unref_list, unref_head) 164 - unref_list_count++; 163 + /* Get the bos from both binner and renderer into hang state. */ 164 + state->bo_count = 0; 165 + for (i = 0; i < 2; i++) { 166 + if (!exec[i]) 167 + continue; 165 168 166 - state->bo_count = exec->bo_count + unref_list_count; 167 - kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), 168 - GFP_ATOMIC); 169 + unref_list_count = 0; 170 + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 171 + unref_list_count++; 172 + state->bo_count += exec[i]->bo_count + unref_list_count; 173 + } 174 + 175 + kernel_state->bo = kcalloc(state->bo_count, 176 + sizeof(*kernel_state->bo), GFP_ATOMIC); 177 + 169 178 if (!kernel_state->bo) { 170 179 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 171 180 return; 172 181 } 173 182 174 - for (i = 0; i < exec->bo_count; i++) { 175 - drm_gem_object_reference(&exec->bo[i]->base); 176 - kernel_state->bo[i] = &exec->bo[i]->base; 183 + prev_idx = 0; 184 + for (i = 0; i < 2; i++) { 185 + if (!exec[i]) 186 + continue; 187 + 188 + for (j = 0; j < exec[i]->bo_count; j++) { 189 + drm_gem_object_reference(&exec[i]->bo[j]->base); 190 + kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 191 + } 192 + 193 + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 194 + drm_gem_object_reference(&bo->base.base); 195 + kernel_state->bo[j + prev_idx] = &bo->base.base; 196 + j++; 197 + } 198 + prev_idx = j + 1; 177 199 } 178 200 179 - list_for_each_entry(bo, &exec->unref_list, unref_head) { 180 - drm_gem_object_reference(&bo->base.base); 181 - kernel_state->bo[i] = &bo->base.base; 182 - i++; 183 - } 184 - 185 - state->start_bin = exec->ct0ca; 186 - state->start_render = exec->ct1ca; 201 + if (exec[0]) 202 + state->start_bin = exec[0]->ct0ca; 203 + if (exec[1]) 204 + state->start_render = exec[1]->ct1ca; 187 205 188 206 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 189 207 ··· 285 267 struct vc4_dev *vc4 = to_vc4_dev(dev); 286 268 uint32_t ct0ca, ct1ca; 287 269 unsigned long irqflags; 288 - struct vc4_exec_info *exec; 270 + struct vc4_exec_info *bin_exec, *render_exec; 289 271 290 272 spin_lock_irqsave(&vc4->job_lock, irqflags); 291 - exec = vc4_first_job(vc4); 273 + 274 + bin_exec = vc4_first_bin_job(vc4); 275 + render_exec = vc4_first_render_job(vc4); 292 276 293 277 /* If idle, we can stop watching for hangs. */ 294 - if (!exec) { 278 + if (!bin_exec && !render_exec) { 295 279 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 280 return; 297 281 } ··· 304 284 /* If we've made any progress in execution, rearm the timer 305 285 * and wait. 306 286 */ 307 - if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) { 308 - exec->last_ct0ca = ct0ca; 309 - exec->last_ct1ca = ct1ca; 287 + if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 288 + (render_exec && ct1ca != render_exec->last_ct1ca)) { 289 + if (bin_exec) 290 + bin_exec->last_ct0ca = ct0ca; 291 + if (render_exec) 292 + render_exec->last_ct1ca = ct1ca; 310 293 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 311 294 vc4_queue_hangcheck(dev); 312 295 return; ··· 409 386 * The job_lock should be held during this. 410 387 */ 411 388 void 412 - vc4_submit_next_job(struct drm_device *dev) 389 + vc4_submit_next_bin_job(struct drm_device *dev) 413 390 { 414 391 struct vc4_dev *vc4 = to_vc4_dev(dev); 415 - struct vc4_exec_info *exec = vc4_first_job(vc4); 392 + struct vc4_exec_info *exec; 416 393 394 + again: 395 + exec = vc4_first_bin_job(vc4); 417 396 if (!exec) 418 397 return; 419 398 ··· 425 400 V3D_WRITE(V3D_BPOA, 0); 426 401 V3D_WRITE(V3D_BPOS, 0); 427 402 428 - if (exec->ct0ca != exec->ct0ea) 403 + /* Either put the job in the binner if it uses the binner, or 404 + * immediately move it to the to-be-rendered queue. 405 + */ 406 + if (exec->ct0ca != exec->ct0ea) { 429 407 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 408 + } else { 409 + vc4_move_job_to_render(dev, exec); 410 + goto again; 411 + } 412 + } 413 + 414 + void 415 + vc4_submit_next_render_job(struct drm_device *dev) 416 + { 417 + struct vc4_dev *vc4 = to_vc4_dev(dev); 418 + struct vc4_exec_info *exec = vc4_first_render_job(vc4); 419 + 420 + if (!exec) 421 + return; 422 + 430 423 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 424 + } 425 + 426 + void 427 + vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 428 + { 429 + struct vc4_dev *vc4 = to_vc4_dev(dev); 430 + bool was_empty = list_empty(&vc4->render_job_list); 431 + 432 + list_move_tail(&exec->head, &vc4->render_job_list); 433 + if (was_empty) 434 + vc4_submit_next_render_job(dev); 431 435 } 432 436 433 437 static void ··· 497 443 exec->seqno = seqno; 498 444 vc4_update_bo_seqnos(exec, seqno); 499 445 500 - list_add_tail(&exec->head, &vc4->job_list); 446 + list_add_tail(&exec->head, &vc4->bin_job_list); 501 447 502 448 /* If no job was executing, kick ours off. Otherwise, it'll 503 - * get started when the previous job's frame done interrupt 449 + * get started when the previous job's flush done interrupt 504 450 * occurs. 505 451 */ 506 - if (vc4_first_job(vc4) == exec) { 507 - vc4_submit_next_job(dev); 452 + if (vc4_first_bin_job(vc4) == exec) { 453 + vc4_submit_next_bin_job(dev); 508 454 vc4_queue_hangcheck(dev); 509 455 } 510 456 ··· 913 859 { 914 860 struct vc4_dev *vc4 = to_vc4_dev(dev); 915 861 916 - INIT_LIST_HEAD(&vc4->job_list); 862 + INIT_LIST_HEAD(&vc4->bin_job_list); 863 + INIT_LIST_HEAD(&vc4->render_job_list); 917 864 INIT_LIST_HEAD(&vc4->job_done_list); 918 865 INIT_LIST_HEAD(&vc4->seqno_cb_list); 919 866 spin_lock_init(&vc4->job_lock);
+10 -2
drivers/gpu/drm/vc4/vc4_hdmi.c
··· 47 47 void __iomem *hdmicore_regs; 48 48 void __iomem *hd_regs; 49 49 int hpd_gpio; 50 + bool hpd_active_low; 50 51 51 52 struct clk *pixel_clock; 52 53 struct clk *hsm_clock; ··· 167 166 struct vc4_dev *vc4 = to_vc4_dev(dev); 168 167 169 168 if (vc4->hdmi->hpd_gpio) { 170 - if (gpio_get_value(vc4->hdmi->hpd_gpio)) 169 + if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^ 170 + vc4->hdmi->hpd_active_low) 171 171 return connector_status_connected; 172 172 else 173 173 return connector_status_disconnected; ··· 519 517 * we'll use the HDMI core's register. 520 518 */ 521 519 if (of_find_property(dev->of_node, "hpd-gpios", &value)) { 522 - hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0); 520 + enum of_gpio_flags hpd_gpio_flags; 521 + 522 + hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node, 523 + "hpd-gpios", 0, 524 + &hpd_gpio_flags); 523 525 if (hdmi->hpd_gpio < 0) { 524 526 ret = hdmi->hpd_gpio; 525 527 goto err_unprepare_hsm; 526 528 } 529 + 530 + hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; 527 531 } 528 532 529 533 vc4->hdmi = hdmi;
+49 -9
drivers/gpu/drm/vc4/vc4_irq.c
··· 30 30 * disables that specific interrupt, and 0s written are ignored 31 31 * (reading either one returns the set of enabled interrupts). 32 32 * 33 + * When we take a binning flush done interrupt, we need to submit the 34 + * next frame for binning and move the finished frame to the render 35 + * thread. 36 + * 33 37 * When we take a render frame interrupt, we need to wake the 34 38 * processes waiting for some frame to be done, and get the next frame 35 39 * submitted ASAP (so the hardware doesn't sit idle when there's work ··· 48 44 #include "vc4_regs.h" 49 45 50 46 #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ 47 + V3D_INT_FLDONE | \ 51 48 V3D_INT_FRDONE) 52 49 53 50 DECLARE_WAIT_QUEUE_HEAD(render_wait); ··· 82 77 unsigned long irqflags; 83 78 84 79 spin_lock_irqsave(&vc4->job_lock, irqflags); 85 - current_exec = vc4_first_job(vc4); 80 + current_exec = vc4_first_bin_job(vc4); 86 81 if (current_exec) { 87 82 vc4->overflow_mem->seqno = vc4->finished_seqno + 1; 88 83 list_add_tail(&vc4->overflow_mem->unref_head, ··· 103 98 } 104 99 105 100 static void 106 - vc4_irq_finish_job(struct drm_device *dev) 101 + vc4_irq_finish_bin_job(struct drm_device *dev) 107 102 { 108 103 struct vc4_dev *vc4 = to_vc4_dev(dev); 109 - struct vc4_exec_info *exec = vc4_first_job(vc4); 104 + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); 105 + 106 + if (!exec) 107 + return; 108 + 109 + vc4_move_job_to_render(dev, exec); 110 + vc4_submit_next_bin_job(dev); 111 + } 112 + 113 + static void 114 + vc4_cancel_bin_job(struct drm_device *dev) 115 + { 116 + struct vc4_dev *vc4 = to_vc4_dev(dev); 117 + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); 118 + 119 + if (!exec) 120 + return; 121 + 122 + list_move_tail(&exec->head, &vc4->bin_job_list); 123 + vc4_submit_next_bin_job(dev); 124 + } 125 + 126 + static void 127 + vc4_irq_finish_render_job(struct drm_device *dev) 128 + { 129 + struct vc4_dev *vc4 = to_vc4_dev(dev); 130 + struct vc4_exec_info *exec = vc4_first_render_job(vc4); 110 131 111 132 if (!exec) 112 133 return; 113 134 114 135 vc4->finished_seqno++; 115 136 list_move_tail(&exec->head, &vc4->job_done_list); 116 - vc4_submit_next_job(dev); 137 + vc4_submit_next_render_job(dev); 117 138 118 139 wake_up_all(&vc4->job_wait_queue); 119 140 schedule_work(&vc4->job_done_work); ··· 156 125 barrier(); 157 126 intctl = V3D_READ(V3D_INTCTL); 158 127 159 - /* Acknowledge the interrupts we're handling here. The render 160 - * frame done interrupt will be cleared, while OUTOMEM will 161 - * stay high until the underlying cause is cleared. 128 + /* Acknowledge the interrupts we're handling here. The binner 129 + * last flush / render frame done interrupt will be cleared, 130 + * while OUTOMEM will stay high until the underlying cause is 131 + * cleared. 162 132 */ 163 133 V3D_WRITE(V3D_INTCTL, intctl); 164 134 ··· 170 138 status = IRQ_HANDLED; 171 139 } 172 140 141 + if (intctl & V3D_INT_FLDONE) { 142 + spin_lock(&vc4->job_lock); 143 + vc4_irq_finish_bin_job(dev); 144 + spin_unlock(&vc4->job_lock); 145 + status = IRQ_HANDLED; 146 + } 147 + 173 148 if (intctl & V3D_INT_FRDONE) { 174 149 spin_lock(&vc4->job_lock); 175 - vc4_irq_finish_job(dev); 150 + vc4_irq_finish_render_job(dev); 176 151 spin_unlock(&vc4->job_lock); 177 152 status = IRQ_HANDLED; 178 153 } ··· 244 205 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); 245 206 246 207 spin_lock_irqsave(&vc4->job_lock, irqflags); 247 - vc4_irq_finish_job(dev); 208 + vc4_cancel_bin_job(dev); 209 + vc4_irq_finish_render_job(dev); 248 210 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 249 211 }
+1
drivers/gpu/drm/vc4/vc4_v3d.c
··· 268 268 } 269 269 270 270 static const struct of_device_id vc4_v3d_dt_match[] = { 271 + { .compatible = "brcm,bcm2835-v3d" }, 271 272 { .compatible = "brcm,vc4-v3d" }, 272 273 {} 273 274 };