Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-vc4-next-2016-02-17' of github.com:anholt/linux into drm-next

This pull request brings in overlay plane support for vc4.

* tag 'drm-vc4-next-2016-02-17' of github.com:anholt/linux:
drm/vc4: Add support for YUV planes.
drm/vc4: Add support a few more RGB display plane formats.
drm/vc4: Add support for scaling of display planes.
drm/vc4: Fix which value is being used for source image size.
drm/vc4: Add more display planes to each CRTC.
drm/vc4: Make the CRTCs cooperate on allocating display lists.
drm/vc4: Add a proper short-circut path for legacy cursor updates.
drm/vc4: Move the plane clipping/scaling setup to a separate function.
drm/vc4: Add missing __iomem annotation to hw_dlist.
drm/vc4: Improve comments on vc4_plane_state members.

+869 -119
+102 -65
drivers/gpu/drm/vc4/vc4_crtc.c
··· 49 49 /* Which HVS channel we're using for our CRTC. */ 50 50 int channel; 51 51 52 - /* Pointer to the actual hardware display list memory for the 53 - * crtc. 54 - */ 55 - u32 __iomem *dlist; 56 - 57 - u32 dlist_size; /* in dwords */ 58 - 59 52 struct drm_pending_vblank_event *event; 53 + }; 54 + 55 + struct vc4_crtc_state { 56 + struct drm_crtc_state base; 57 + /* Dlist area for this CRTC configuration. */ 58 + struct drm_mm_node mm; 60 59 }; 61 60 62 61 static inline struct vc4_crtc * 63 62 to_vc4_crtc(struct drm_crtc *crtc) 64 63 { 65 64 return (struct vc4_crtc *)crtc; 65 + } 66 + 67 + static inline struct vc4_crtc_state * 68 + to_vc4_crtc_state(struct drm_crtc_state *crtc_state) 69 + { 70 + return (struct vc4_crtc_state *)crtc_state; 66 71 } 67 72 68 73 struct vc4_crtc_data { ··· 324 319 static int vc4_crtc_atomic_check(struct drm_crtc *crtc, 325 320 struct drm_crtc_state *state) 326 321 { 322 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); 327 323 struct drm_device *dev = crtc->dev; 328 324 struct vc4_dev *vc4 = to_vc4_dev(dev); 329 325 struct drm_plane *plane; 330 - struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); 326 + unsigned long flags; 331 327 u32 dlist_count = 0; 328 + int ret; 332 329 333 330 /* The pixelvalve can only feed one encoder (and encoders are 334 331 * 1:1 with connectors.) ··· 353 346 354 347 dlist_count++; /* Account for SCALER_CTL0_END. */ 355 348 356 - if (!vc4_crtc->dlist || dlist_count > vc4_crtc->dlist_size) { 357 - vc4_crtc->dlist = ((u32 __iomem *)vc4->hvs->dlist + 358 - HVS_BOOTLOADER_DLIST_END); 359 - vc4_crtc->dlist_size = ((SCALER_DLIST_SIZE >> 2) - 360 - HVS_BOOTLOADER_DLIST_END); 361 - 362 - if (dlist_count > vc4_crtc->dlist_size) { 363 - DRM_DEBUG_KMS("dlist too large for CRTC (%d > %d).\n", 364 - dlist_count, vc4_crtc->dlist_size); 365 - return -EINVAL; 366 - } 367 - } 349 + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); 350 + ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm, 351 + dlist_count, 1, 0); 352 + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); 353 + if (ret) 354 + return ret; 368 355 369 356 return 0; 370 357 } ··· 369 368 struct drm_device *dev = crtc->dev; 370 369 struct vc4_dev *vc4 = to_vc4_dev(dev); 371 370 struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); 371 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); 372 372 struct drm_plane *plane; 373 373 bool debug_dump_regs = false; 374 - u32 __iomem *dlist_next = vc4_crtc->dlist; 374 + u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; 375 + u32 __iomem *dlist_next = dlist_start; 375 376 376 377 if (debug_dump_regs) { 377 378 DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc)); 378 379 vc4_hvs_dump_state(dev); 379 380 } 380 381 381 - /* Copy all the active planes' dlist contents to the hardware dlist. 382 - * 383 - * XXX: If the new display list was large enough that it 384 - * overlapped a currently-read display list, we need to do 385 - * something like disable scanout before putting in the new 386 - * list. For now, we're safe because we only have the two 387 - * planes. 388 - */ 382 + /* Copy all the active planes' dlist contents to the hardware dlist. */ 389 383 drm_atomic_crtc_for_each_plane(plane, crtc) { 390 384 dlist_next += vc4_plane_write_dlist(plane, dlist_next); 391 385 } 392 386 393 - if (dlist_next == vc4_crtc->dlist) { 394 - /* If no planes were enabled, use the SCALER_CTL0_END 395 - * at the start of the display list memory (in the 396 - * bootloader section). We'll rewrite that 397 - * SCALER_CTL0_END, just in case, though. 398 - */ 399 - writel(SCALER_CTL0_END, vc4->hvs->dlist); 400 - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 0); 401 - } else { 402 - writel(SCALER_CTL0_END, dlist_next); 403 - dlist_next++; 387 + writel(SCALER_CTL0_END, dlist_next); 388 + dlist_next++; 404 389 405 - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 406 - (u32 __iomem *)vc4_crtc->dlist - 407 - (u32 __iomem *)vc4->hvs->dlist); 390 + WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); 408 391 409 - /* Make the next display list start after ours. */ 410 - vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist); 411 - vc4_crtc->dlist = dlist_next; 412 - } 392 + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 393 + vc4_state->mm.start); 413 394 414 395 if (debug_dump_regs) { 415 396 DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); ··· 556 573 return drm_atomic_helper_page_flip(crtc, fb, event, flags); 557 574 } 558 575 576 + static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) 577 + { 578 + struct vc4_crtc_state *vc4_state; 579 + 580 + vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 581 + if (!vc4_state) 582 + return NULL; 583 + 584 + __drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base); 585 + return &vc4_state->base; 586 + } 587 + 588 + static void vc4_crtc_destroy_state(struct drm_crtc *crtc, 589 + struct drm_crtc_state *state) 590 + { 591 + struct vc4_dev *vc4 = to_vc4_dev(crtc->dev); 592 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); 593 + 594 + if (vc4_state->mm.allocated) { 595 + unsigned long flags; 596 + 597 + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); 598 + drm_mm_remove_node(&vc4_state->mm); 599 + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); 600 + 601 + } 602 + 603 + __drm_atomic_helper_crtc_destroy_state(crtc, state); 604 + } 605 + 559 606 static const struct drm_crtc_funcs vc4_crtc_funcs = { 560 607 .set_config = drm_atomic_helper_set_config, 561 608 .destroy = vc4_crtc_destroy, ··· 594 581 .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ 595 582 .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ 596 583 .reset = drm_atomic_helper_crtc_reset, 597 - .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, 598 - .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, 584 + .atomic_duplicate_state = vc4_crtc_duplicate_state, 585 + .atomic_destroy_state = vc4_crtc_destroy_state, 599 586 }; 600 587 601 588 static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { ··· 657 644 struct vc4_dev *vc4 = to_vc4_dev(drm); 658 645 struct vc4_crtc *vc4_crtc; 659 646 struct drm_crtc *crtc; 660 - struct drm_plane *primary_plane, *cursor_plane; 647 + struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp; 661 648 const struct of_device_id *match; 662 - int ret; 649 + int ret, i; 663 650 664 651 vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL); 665 652 if (!vc4_crtc) ··· 688 675 goto err; 689 676 } 690 677 691 - cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 692 - if (IS_ERR(cursor_plane)) { 693 - dev_err(dev, "failed to construct cursor plane\n"); 694 - ret = PTR_ERR(cursor_plane); 695 - goto err_primary; 696 - } 697 - 698 - drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane, 678 + drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL, 699 679 &vc4_crtc_funcs, NULL); 700 680 drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs); 701 681 primary_plane->crtc = crtc; 702 - cursor_plane->crtc = crtc; 703 682 vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc; 704 683 vc4_crtc->channel = vc4_crtc->data->hvs_channel; 684 + 685 + /* Set up some arbitrary number of planes. We're not limited 686 + * by a set number of physical registers, just the space in 687 + * the HVS (16k) and how small an plane can be (28 bytes). 688 + * However, each plane we set up takes up some memory, and 689 + * increases the cost of looping over planes, which atomic 690 + * modesetting does quite a bit. As a result, we pick a 691 + * modest number of planes to expose, that should hopefully 692 + * still cover any sane usecase. 693 + */ 694 + for (i = 0; i < 8; i++) { 695 + struct drm_plane *plane = 696 + vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 697 + 698 + if (IS_ERR(plane)) 699 + continue; 700 + 701 + plane->possible_crtcs = 1 << drm_crtc_index(crtc); 702 + } 703 + 704 + /* Set up the legacy cursor after overlay initialization, 705 + * since we overlay planes on the CRTC in the order they were 706 + * initialized. 707 + */ 708 + cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 709 + if (!IS_ERR(cursor_plane)) { 710 + cursor_plane->possible_crtcs = 1 << drm_crtc_index(crtc); 711 + cursor_plane->crtc = crtc; 712 + crtc->cursor = cursor_plane; 713 + } 705 714 706 715 CRTC_WRITE(PV_INTEN, 0); 707 716 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); 708 717 ret = devm_request_irq(dev, platform_get_irq(pdev, 0), 709 718 vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc); 710 719 if (ret) 711 - goto err_cursor; 720 + goto err_destroy_planes; 712 721 713 722 vc4_set_crtc_possible_masks(drm, crtc); 714 723 ··· 738 703 739 704 return 0; 740 705 741 - err_cursor: 742 - cursor_plane->funcs->destroy(cursor_plane); 743 - err_primary: 744 - primary_plane->funcs->destroy(primary_plane); 706 + err_destroy_planes: 707 + list_for_each_entry_safe(destroy_plane, temp, 708 + &drm->mode_config.plane_list, head) { 709 + if (destroy_plane->possible_crtcs == 1 << drm_crtc_index(crtc)) 710 + destroy_plane->funcs->destroy(destroy_plane); 711 + } 745 712 err: 746 713 return ret; 747 714 }
+11 -1
drivers/gpu/drm/vc4/vc4_drv.h
··· 149 149 struct vc4_hvs { 150 150 struct platform_device *pdev; 151 151 void __iomem *regs; 152 - void __iomem *dlist; 152 + u32 __iomem *dlist; 153 + 154 + /* Memory manager for CRTCs to allocate space in the display 155 + * list. Units are dwords. 156 + */ 157 + struct drm_mm dlist_mm; 158 + /* Memory manager for the LBM memory used by HVS scaling. */ 159 + struct drm_mm lbm_mm; 160 + spinlock_t mm_lock; 161 + 162 + struct drm_mm_node mitchell_netravali_filter; 153 163 }; 154 164 155 165 struct vc4_plane {
+97
drivers/gpu/drm/vc4/vc4_hvs.c
··· 100 100 } 101 101 #endif 102 102 103 + /* The filter kernel is composed of dwords each containing 3 9-bit 104 + * signed integers packed next to each other. 105 + */ 106 + #define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff) 107 + #define VC4_PPF_FILTER_WORD(c0, c1, c2) \ 108 + ((((c0) & 0x1ff) << 0) | \ 109 + (((c1) & 0x1ff) << 9) | \ 110 + (((c2) & 0x1ff) << 18)) 111 + 112 + /* The whole filter kernel is arranged as the coefficients 0-16 going 113 + * up, then a pad, then 17-31 going down and reversed within the 114 + * dwords. This means that a linear phase kernel (where it's 115 + * symmetrical at the boundary between 15 and 16) has the last 5 116 + * dwords matching the first 5, but reversed. 117 + */ 118 + #define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \ 119 + c9, c10, c11, c12, c13, c14, c15) \ 120 + {VC4_PPF_FILTER_WORD(c0, c1, c2), \ 121 + VC4_PPF_FILTER_WORD(c3, c4, c5), \ 122 + VC4_PPF_FILTER_WORD(c6, c7, c8), \ 123 + VC4_PPF_FILTER_WORD(c9, c10, c11), \ 124 + VC4_PPF_FILTER_WORD(c12, c13, c14), \ 125 + VC4_PPF_FILTER_WORD(c15, c15, 0)} 126 + 127 + #define VC4_LINEAR_PHASE_KERNEL_DWORDS 6 128 + #define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1) 129 + 130 + /* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali. 131 + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf 132 + */ 133 + static const u32 mitchell_netravali_1_3_1_3_kernel[] = 134 + VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18, 135 + 50, 82, 119, 155, 187, 213, 227); 136 + 137 + static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, 138 + struct drm_mm_node *space, 139 + const u32 *kernel) 140 + { 141 + int ret, i; 142 + u32 __iomem *dst_kernel; 143 + 144 + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1, 145 + 0); 146 + if (ret) { 147 + DRM_ERROR("Failed to allocate space for filter kernel: %d\n", 148 + ret); 149 + return ret; 150 + } 151 + 152 + dst_kernel = hvs->dlist + space->start; 153 + 154 + for (i = 0; i < VC4_KERNEL_DWORDS; i++) { 155 + if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS) 156 + writel(kernel[i], &dst_kernel[i]); 157 + else { 158 + writel(kernel[VC4_KERNEL_DWORDS - i - 1], 159 + &dst_kernel[i]); 160 + } 161 + } 162 + 163 + return 0; 164 + } 165 + 103 166 static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) 104 167 { 105 168 struct platform_device *pdev = to_platform_device(dev); 106 169 struct drm_device *drm = dev_get_drvdata(master); 107 170 struct vc4_dev *vc4 = drm->dev_private; 108 171 struct vc4_hvs *hvs = NULL; 172 + int ret; 109 173 110 174 hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL); 111 175 if (!hvs) ··· 183 119 184 120 hvs->dlist = hvs->regs + SCALER_DLIST_START; 185 121 122 + spin_lock_init(&hvs->mm_lock); 123 + 124 + /* Set up the HVS display list memory manager. We never 125 + * overwrite the setup from the bootloader (just 128b out of 126 + * our 16K), since we don't want to scramble the screen when 127 + * transitioning from the firmware's boot setup to runtime. 128 + */ 129 + drm_mm_init(&hvs->dlist_mm, 130 + HVS_BOOTLOADER_DLIST_END, 131 + (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); 132 + 133 + /* Set up the HVS LBM memory manager. We could have some more 134 + * complicated data structure that allowed reuse of LBM areas 135 + * between planes when they don't overlap on the screen, but 136 + * for now we just allocate globally. 137 + */ 138 + drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); 139 + 140 + /* Upload filter kernels. We only have the one for now, so we 141 + * keep it around for the lifetime of the driver. 142 + */ 143 + ret = vc4_hvs_upload_linear_kernel(hvs, 144 + &hvs->mitchell_netravali_filter, 145 + mitchell_netravali_1_3_1_3_kernel); 146 + if (ret) 147 + return ret; 148 + 186 149 vc4->hvs = hvs; 187 150 return 0; 188 151 } ··· 219 128 { 220 129 struct drm_device *drm = dev_get_drvdata(master); 221 130 struct vc4_dev *vc4 = drm->dev_private; 131 + 132 + if (vc4->hvs->mitchell_netravali_filter.allocated) 133 + drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter); 134 + 135 + drm_mm_takedown(&vc4->hvs->dlist_mm); 136 + drm_mm_takedown(&vc4->hvs->lbm_mm); 222 137 223 138 vc4->hvs = NULL; 224 139 }
+9
drivers/gpu/drm/vc4/vc4_kms.c
··· 49 49 50 50 drm_atomic_helper_commit_modeset_enables(dev, state); 51 51 52 + /* Make sure that drm_atomic_helper_wait_for_vblanks() 53 + * actually waits for vblank. If we're doing a full atomic 54 + * modeset (as opposed to a vc4_update_plane() short circuit), 55 + * then we need to wait for scanout to be done with our 56 + * display lists before we free it and potentially reallocate 57 + * and overwrite the dlist memory with a new modeset. 58 + */ 59 + state->legacy_cursor_update = false; 60 + 52 61 drm_atomic_helper_wait_for_vblanks(dev, state); 53 62 54 63 drm_atomic_helper_cleanup_planes(dev, state);
+549 -52
drivers/gpu/drm/vc4/vc4_plane.c
··· 24 24 #include "drm_fb_cma_helper.h" 25 25 #include "drm_plane_helper.h" 26 26 27 + enum vc4_scaling_mode { 28 + VC4_SCALING_NONE, 29 + VC4_SCALING_TPZ, 30 + VC4_SCALING_PPF, 31 + }; 32 + 27 33 struct vc4_plane_state { 28 34 struct drm_plane_state base; 35 + /* System memory copy of the display list for this element, computed 36 + * at atomic_check time. 37 + */ 29 38 u32 *dlist; 30 - u32 dlist_size; /* Number of dwords in allocated for the display list */ 39 + u32 dlist_size; /* Number of dwords allocated for the display list */ 31 40 u32 dlist_count; /* Number of used dwords in the display list. */ 32 41 33 - /* Offset in the dlist to pointer word 0. */ 34 - u32 pw0_offset; 42 + /* Offset in the dlist to various words, for pageflip or 43 + * cursor updates. 44 + */ 45 + u32 pos0_offset; 46 + u32 pos2_offset; 47 + u32 ptr0_offset; 35 48 36 49 /* Offset where the plane's dlist was last stored in the 37 - hardware at vc4_crtc_atomic_flush() time. 38 - */ 39 - u32 *hw_dlist; 50 + * hardware at vc4_crtc_atomic_flush() time. 51 + */ 52 + u32 __iomem *hw_dlist; 53 + 54 + /* Clipped coordinates of the plane on the display. */ 55 + int crtc_x, crtc_y, crtc_w, crtc_h; 56 + /* Clipped area being scanned from in the FB. */ 57 + u32 src_x, src_y; 58 + 59 + u32 src_w[2], src_h[2]; 60 + 61 + /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ 62 + enum vc4_scaling_mode x_scaling[2], y_scaling[2]; 63 + bool is_unity; 64 + bool is_yuv; 65 + 66 + /* Offset to start scanning out from the start of the plane's 67 + * BO. 68 + */ 69 + u32 offsets[3]; 70 + 71 + /* Our allocation in LBM for temporary storage during scaling. */ 72 + struct drm_mm_node lbm; 40 73 }; 41 74 42 75 static inline struct vc4_plane_state * ··· 83 50 u32 hvs; /* HVS_FORMAT_* */ 84 51 u32 pixel_order; 85 52 bool has_alpha; 53 + bool flip_cbcr; 86 54 } hvs_formats[] = { 87 55 { 88 56 .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, ··· 92 58 { 93 59 .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, 94 60 .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, 61 + }, 62 + { 63 + .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, 64 + .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, 65 + }, 66 + { 67 + .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565, 68 + .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, 69 + }, 70 + { 71 + .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, 72 + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, 73 + }, 74 + { 75 + .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, 76 + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, 77 + }, 78 + { 79 + .drm = DRM_FORMAT_YUV422, 80 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 81 + }, 82 + { 83 + .drm = DRM_FORMAT_YVU422, 84 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 85 + .flip_cbcr = true, 86 + }, 87 + { 88 + .drm = DRM_FORMAT_YUV420, 89 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 90 + }, 91 + { 92 + .drm = DRM_FORMAT_YVU420, 93 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 94 + .flip_cbcr = true, 95 + }, 96 + { 97 + .drm = DRM_FORMAT_NV12, 98 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 99 + }, 100 + { 101 + .drm = DRM_FORMAT_NV16, 102 + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 95 103 }, 96 104 }; 97 105 ··· 147 71 } 148 72 149 73 return NULL; 74 + } 75 + 76 + static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 77 + { 78 + if (dst > src) 79 + return VC4_SCALING_PPF; 80 + else if (dst < src) 81 + return VC4_SCALING_TPZ; 82 + else 83 + return VC4_SCALING_NONE; 150 84 } 151 85 152 86 static bool plane_enabled(struct drm_plane_state *state) ··· 174 88 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 175 89 if (!vc4_state) 176 90 return NULL; 91 + 92 + memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 177 93 178 94 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 179 95 ··· 196 108 static void vc4_plane_destroy_state(struct drm_plane *plane, 197 109 struct drm_plane_state *state) 198 110 { 111 + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 199 112 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 113 + 114 + if (vc4_state->lbm.allocated) { 115 + unsigned long irqflags; 116 + 117 + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 118 + drm_mm_remove_node(&vc4_state->lbm); 119 + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 120 + } 200 121 201 122 kfree(vc4_state->dlist); 202 123 __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base); ··· 245 148 vc4_state->dlist[vc4_state->dlist_count++] = val; 246 149 } 247 150 151 + /* Returns the scl0/scl1 field based on whether the dimensions need to 152 + * be up/down/non-scaled. 153 + * 154 + * This is a replication of a table from the spec. 155 + */ 156 + static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 157 + { 158 + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 159 + 160 + switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 161 + case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 162 + return SCALER_CTL0_SCL_H_PPF_V_PPF; 163 + case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 164 + return SCALER_CTL0_SCL_H_TPZ_V_PPF; 165 + case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 166 + return SCALER_CTL0_SCL_H_PPF_V_TPZ; 167 + case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 168 + return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 169 + case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 170 + return SCALER_CTL0_SCL_H_PPF_V_NONE; 171 + case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 172 + return SCALER_CTL0_SCL_H_NONE_V_PPF; 173 + case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 174 + return SCALER_CTL0_SCL_H_NONE_V_TPZ; 175 + case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 176 + return SCALER_CTL0_SCL_H_TPZ_V_NONE; 177 + default: 178 + case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 179 + /* The unity case is independently handled by 180 + * SCALER_CTL0_UNITY. 181 + */ 182 + return 0; 183 + } 184 + } 185 + 186 + static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 187 + { 188 + struct drm_plane *plane = state->plane; 189 + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 190 + struct drm_framebuffer *fb = state->fb; 191 + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 192 + u32 subpixel_src_mask = (1 << 16) - 1; 193 + u32 format = fb->pixel_format; 194 + int num_planes = drm_format_num_planes(format); 195 + u32 h_subsample = 1; 196 + u32 v_subsample = 1; 197 + int i; 198 + 199 + for (i = 0; i < num_planes; i++) 200 + vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 201 + 202 + /* We don't support subpixel source positioning for scaling. */ 203 + if ((state->src_x & subpixel_src_mask) || 204 + (state->src_y & subpixel_src_mask) || 205 + (state->src_w & subpixel_src_mask) || 206 + (state->src_h & subpixel_src_mask)) { 207 + return -EINVAL; 208 + } 209 + 210 + vc4_state->src_x = state->src_x >> 16; 211 + vc4_state->src_y = state->src_y >> 16; 212 + vc4_state->src_w[0] = state->src_w >> 16; 213 + vc4_state->src_h[0] = state->src_h >> 16; 214 + 215 + vc4_state->crtc_x = state->crtc_x; 216 + vc4_state->crtc_y = state->crtc_y; 217 + vc4_state->crtc_w = state->crtc_w; 218 + vc4_state->crtc_h = state->crtc_h; 219 + 220 + vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 221 + vc4_state->crtc_w); 222 + vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 223 + vc4_state->crtc_h); 224 + 225 + if (num_planes > 1) { 226 + vc4_state->is_yuv = true; 227 + 228 + h_subsample = drm_format_horz_chroma_subsampling(format); 229 + v_subsample = drm_format_vert_chroma_subsampling(format); 230 + vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 231 + vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 232 + 233 + vc4_state->x_scaling[1] = 234 + vc4_get_scaling_mode(vc4_state->src_w[1], 235 + vc4_state->crtc_w); 236 + vc4_state->y_scaling[1] = 237 + vc4_get_scaling_mode(vc4_state->src_h[1], 238 + vc4_state->crtc_h); 239 + 240 + /* YUV conversion requires that scaling be enabled, 241 + * even on a plane that's otherwise 1:1. Choose TPZ 242 + * for simplicity. 243 + */ 244 + if (vc4_state->x_scaling[0] == VC4_SCALING_NONE) 245 + vc4_state->x_scaling[0] = VC4_SCALING_TPZ; 246 + if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) 247 + vc4_state->y_scaling[0] = VC4_SCALING_TPZ; 248 + } 249 + 250 + vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 251 + vc4_state->y_scaling[0] == VC4_SCALING_NONE && 252 + vc4_state->x_scaling[1] == VC4_SCALING_NONE && 253 + vc4_state->y_scaling[1] == VC4_SCALING_NONE); 254 + 255 + /* No configuring scaling on the cursor plane, since it gets 256 + non-vblank-synced updates, and scaling requires requires 257 + LBM changes which have to be vblank-synced. 258 + */ 259 + if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity) 260 + return -EINVAL; 261 + 262 + /* Clamp the on-screen start x/y to 0. The hardware doesn't 263 + * support negative y, and negative x wastes bandwidth. 264 + */ 265 + if (vc4_state->crtc_x < 0) { 266 + for (i = 0; i < num_planes; i++) { 267 + u32 cpp = drm_format_plane_cpp(fb->pixel_format, i); 268 + u32 subs = ((i == 0) ? 1 : h_subsample); 269 + 270 + vc4_state->offsets[i] += (cpp * 271 + (-vc4_state->crtc_x) / subs); 272 + } 273 + vc4_state->src_w[0] += vc4_state->crtc_x; 274 + vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample; 275 + vc4_state->crtc_x = 0; 276 + } 277 + 278 + if (vc4_state->crtc_y < 0) { 279 + for (i = 0; i < num_planes; i++) { 280 + u32 subs = ((i == 0) ? 1 : v_subsample); 281 + 282 + vc4_state->offsets[i] += (fb->pitches[i] * 283 + (-vc4_state->crtc_y) / subs); 284 + } 285 + vc4_state->src_h[0] += vc4_state->crtc_y; 286 + vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample; 287 + vc4_state->crtc_y = 0; 288 + } 289 + 290 + return 0; 291 + } 292 + 293 + static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 294 + { 295 + u32 scale, recip; 296 + 297 + scale = (1 << 16) * src / dst; 298 + 299 + /* The specs note that while the reciprocal would be defined 300 + * as (1<<32)/scale, ~0 is close enough. 301 + */ 302 + recip = ~0 / scale; 303 + 304 + vc4_dlist_write(vc4_state, 305 + VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 306 + VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 307 + vc4_dlist_write(vc4_state, 308 + VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 309 + } 310 + 311 + static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 312 + { 313 + u32 scale = (1 << 16) * src / dst; 314 + 315 + vc4_dlist_write(vc4_state, 316 + SCALER_PPF_AGC | 317 + VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 318 + VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 319 + } 320 + 321 + static u32 vc4_lbm_size(struct drm_plane_state *state) 322 + { 323 + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 324 + /* This is the worst case number. One of the two sizes will 325 + * be used depending on the scaling configuration. 326 + */ 327 + u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w); 328 + u32 lbm; 329 + 330 + if (!vc4_state->is_yuv) { 331 + if (vc4_state->is_unity) 332 + return 0; 333 + else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 334 + lbm = pix_per_line * 8; 335 + else { 336 + /* In special cases, this multiplier might be 12. */ 337 + lbm = pix_per_line * 16; 338 + } 339 + } else { 340 + /* There are cases for this going down to a multiplier 341 + * of 2, but according to the firmware source, the 342 + * table in the docs is somewhat wrong. 343 + */ 344 + lbm = pix_per_line * 16; 345 + } 346 + 347 + lbm = roundup(lbm, 32); 348 + 349 + return lbm; 350 + } 351 + 352 + static void vc4_write_scaling_parameters(struct drm_plane_state *state, 353 + int channel) 354 + { 355 + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 356 + 357 + /* Ch0 H-PPF Word 0: Scaling Parameters */ 358 + if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 359 + vc4_write_ppf(vc4_state, 360 + vc4_state->src_w[channel], vc4_state->crtc_w); 361 + } 362 + 363 + /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 364 + if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 365 + vc4_write_ppf(vc4_state, 366 + vc4_state->src_h[channel], vc4_state->crtc_h); 367 + vc4_dlist_write(vc4_state, 0xc0c0c0c0); 368 + } 369 + 370 + /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 371 + if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 372 + vc4_write_tpz(vc4_state, 373 + vc4_state->src_w[channel], vc4_state->crtc_w); 374 + } 375 + 376 + /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 377 + if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 378 + vc4_write_tpz(vc4_state, 379 + vc4_state->src_h[channel], vc4_state->crtc_h); 380 + vc4_dlist_write(vc4_state, 0xc0c0c0c0); 381 + } 382 + } 383 + 248 384 /* Writes out a full display list for an active plane to the plane's 249 385 * private dlist state. 250 386 */ 251 387 static int vc4_plane_mode_set(struct drm_plane *plane, 252 388 struct drm_plane_state *state) 253 389 { 390 + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 254 391 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 255 392 struct drm_framebuffer *fb = state->fb; 256 - struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 257 393 u32 ctl0_offset = vc4_state->dlist_count; 258 394 const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); 259 - uint32_t offset = fb->offsets[0]; 260 - int crtc_x = state->crtc_x; 261 - int crtc_y = state->crtc_y; 262 - int crtc_w = state->crtc_w; 263 - int crtc_h = state->crtc_h; 395 + int num_planes = drm_format_num_planes(format->drm); 396 + u32 scl0, scl1; 397 + u32 lbm_size; 398 + unsigned long irqflags; 399 + int ret, i; 264 400 265 - if (state->crtc_w << 16 != state->src_w || 266 - state->crtc_h << 16 != state->src_h) { 267 - /* We don't support scaling yet, which involves 268 - * allocating the LBM memory for scaling temporary 269 - * storage, and putting filter kernels in the HVS 270 - * context. 271 - */ 272 - return -EINVAL; 401 + ret = vc4_plane_setup_clipping_and_scaling(state); 402 + if (ret) 403 + return ret; 404 + 405 + /* Allocate the LBM memory that the HVS will use for temporary 406 + * storage due to our scaling/format conversion. 407 + */ 408 + lbm_size = vc4_lbm_size(state); 409 + if (lbm_size) { 410 + if (!vc4_state->lbm.allocated) { 411 + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 412 + ret = drm_mm_insert_node(&vc4->hvs->lbm_mm, 413 + &vc4_state->lbm, 414 + lbm_size, 32, 0); 415 + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 416 + } else { 417 + WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 418 + } 273 419 } 274 420 275 - if (crtc_x < 0) { 276 - offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x; 277 - crtc_w += crtc_x; 278 - crtc_x = 0; 421 + if (ret) 422 + return ret; 423 + 424 + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 425 + * and 4:4:4, scl1 should be set to scl0 so both channels of 426 + * the scaler do the same thing. For YUV, the Y plane needs 427 + * to be put in channel 1 and Cb/Cr in channel 0, so we swap 428 + * the scl fields here. 429 + */ 430 + if (num_planes == 1) { 431 + scl0 = vc4_get_scl_field(state, 1); 432 + scl1 = scl0; 433 + } else { 434 + scl0 = vc4_get_scl_field(state, 1); 435 + scl1 = vc4_get_scl_field(state, 0); 279 436 } 280 437 281 - if (crtc_y < 0) { 282 - offset += fb->pitches[0] * -crtc_y; 283 - crtc_h += crtc_y; 284 - crtc_y = 0; 285 - } 286 - 438 + /* Control word */ 287 439 vc4_dlist_write(vc4_state, 288 440 SCALER_CTL0_VALID | 289 441 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 290 442 (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 291 - SCALER_CTL0_UNITY); 443 + (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 444 + VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 445 + VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 292 446 293 447 /* Position Word 0: Image Positions and Alpha Value */ 448 + vc4_state->pos0_offset = vc4_state->dlist_count; 294 449 vc4_dlist_write(vc4_state, 295 450 VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | 296 - VC4_SET_FIELD(crtc_x, SCALER_POS0_START_X) | 297 - VC4_SET_FIELD(crtc_y, SCALER_POS0_START_Y)); 451 + VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 452 + VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 298 453 299 - /* Position Word 1: Scaled Image Dimensions. 300 - * Skipped due to SCALER_CTL0_UNITY scaling. 301 - */ 454 + /* Position Word 1: Scaled Image Dimensions. */ 455 + if (!vc4_state->is_unity) { 456 + vc4_dlist_write(vc4_state, 457 + VC4_SET_FIELD(vc4_state->crtc_w, 458 + SCALER_POS1_SCL_WIDTH) | 459 + VC4_SET_FIELD(vc4_state->crtc_h, 460 + SCALER_POS1_SCL_HEIGHT)); 461 + } 302 462 303 463 /* Position Word 2: Source Image Size, Alpha Mode */ 464 + vc4_state->pos2_offset = vc4_state->dlist_count; 304 465 vc4_dlist_write(vc4_state, 305 466 VC4_SET_FIELD(format->has_alpha ? 306 467 SCALER_POS2_ALPHA_MODE_PIPELINE : 307 468 SCALER_POS2_ALPHA_MODE_FIXED, 308 469 SCALER_POS2_ALPHA_MODE) | 309 - VC4_SET_FIELD(crtc_w, SCALER_POS2_WIDTH) | 310 - VC4_SET_FIELD(crtc_h, SCALER_POS2_HEIGHT)); 470 + VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | 471 + VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); 311 472 312 473 /* Position Word 3: Context. Written by the HVS. */ 313 474 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 314 475 315 - vc4_state->pw0_offset = vc4_state->dlist_count; 316 476 317 - /* Pointer Word 0: RGB / Y Pointer */ 318 - vc4_dlist_write(vc4_state, bo->paddr + offset); 477 + /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 478 + * 479 + * The pointers may be any byte address. 480 + */ 481 + vc4_state->ptr0_offset = vc4_state->dlist_count; 482 + if (!format->flip_cbcr) { 483 + for (i = 0; i < num_planes; i++) 484 + vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 485 + } else { 486 + WARN_ON_ONCE(num_planes != 3); 487 + vc4_dlist_write(vc4_state, vc4_state->offsets[0]); 488 + vc4_dlist_write(vc4_state, vc4_state->offsets[2]); 489 + vc4_dlist_write(vc4_state, vc4_state->offsets[1]); 490 + } 319 491 320 - /* Pointer Context Word 0: Written by the HVS */ 321 - vc4_dlist_write(vc4_state, 0xc0c0c0c0); 492 + /* Pointer Context Word 0/1/2: Written by the HVS */ 493 + for (i = 0; i < num_planes; i++) 494 + vc4_dlist_write(vc4_state, 0xc0c0c0c0); 322 495 323 - /* Pitch word 0: Pointer 0 Pitch */ 324 - vc4_dlist_write(vc4_state, 325 - VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH)); 496 + /* Pitch word 0/1/2 */ 497 + for (i = 0; i < num_planes; i++) { 498 + vc4_dlist_write(vc4_state, 499 + VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); 500 + } 501 + 502 + /* Colorspace conversion words */ 503 + if (vc4_state->is_yuv) { 504 + vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); 505 + vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); 506 + vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); 507 + } 508 + 509 + if (!vc4_state->is_unity) { 510 + /* LBM Base Address. */ 511 + if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 512 + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 513 + vc4_dlist_write(vc4_state, vc4_state->lbm.start); 514 + } 515 + 516 + if (num_planes > 1) { 517 + /* Emit Cb/Cr as channel 0 and Y as channel 518 + * 1. This matches how we set up scl0/scl1 519 + * above. 520 + */ 521 + vc4_write_scaling_parameters(state, 1); 522 + } 523 + vc4_write_scaling_parameters(state, 0); 524 + 525 + /* If any PPF setup was done, then all the kernel 526 + * pointers get uploaded. 527 + */ 528 + if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 529 + vc4_state->y_scaling[0] == VC4_SCALING_PPF || 530 + vc4_state->x_scaling[1] == VC4_SCALING_PPF || 531 + vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 532 + u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 533 + SCALER_PPF_KERNEL_OFFSET); 534 + 535 + /* HPPF plane 0 */ 536 + vc4_dlist_write(vc4_state, kernel); 537 + /* VPPF plane 0 */ 538 + vc4_dlist_write(vc4_state, kernel); 539 + /* HPPF plane 1 */ 540 + vc4_dlist_write(vc4_state, kernel); 541 + /* VPPF plane 1 */ 542 + vc4_dlist_write(vc4_state, kernel); 543 + } 544 + } 326 545 327 546 vc4_state->dlist[ctl0_offset] |= 328 547 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); ··· 716 303 * scanout will start from this address as soon as the FIFO 717 304 * needs to refill with pixels. 718 305 */ 719 - writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]); 306 + writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 720 307 721 308 /* Also update the CPU-side dlist copy, so that any later 722 309 * atomic updates that don't do a new modeset on our plane 723 310 * also use our updated address. 724 311 */ 725 - vc4_state->dlist[vc4_state->pw0_offset] = addr; 312 + vc4_state->dlist[vc4_state->ptr0_offset] = addr; 726 313 } 727 314 728 315 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { ··· 738 325 drm_plane_cleanup(plane); 739 326 } 740 327 328 + /* Implements immediate (non-vblank-synced) updates of the cursor 329 + * position, or falls back to the atomic helper otherwise. 330 + */ 331 + static int 332 + vc4_update_plane(struct drm_plane *plane, 333 + struct drm_crtc *crtc, 334 + struct drm_framebuffer *fb, 335 + int crtc_x, int crtc_y, 336 + unsigned int crtc_w, unsigned int crtc_h, 337 + uint32_t src_x, uint32_t src_y, 338 + uint32_t src_w, uint32_t src_h) 339 + { 340 + struct drm_plane_state *plane_state; 341 + struct vc4_plane_state *vc4_state; 342 + 343 + if (plane != crtc->cursor) 344 + goto out; 345 + 346 + plane_state = plane->state; 347 + vc4_state = to_vc4_plane_state(plane_state); 348 + 349 + if (!plane_state) 350 + goto out; 351 + 352 + /* If we're changing the cursor contents, do that in the 353 + * normal vblank-synced atomic path. 354 + */ 355 + if (fb != plane_state->fb) 356 + goto out; 357 + 358 + /* No configuring new scaling in the fast path. */ 359 + if (crtc_w != plane_state->crtc_w || 360 + crtc_h != plane_state->crtc_h || 361 + src_w != plane_state->src_w || 362 + src_h != plane_state->src_h) { 363 + goto out; 364 + } 365 + 366 + /* Set the cursor's position on the screen. This is the 367 + * expected change from the drm_mode_cursor_universal() 368 + * helper. 369 + */ 370 + plane_state->crtc_x = crtc_x; 371 + plane_state->crtc_y = crtc_y; 372 + 373 + /* Allow changing the start position within the cursor BO, if 374 + * that matters. 375 + */ 376 + plane_state->src_x = src_x; 377 + plane_state->src_y = src_y; 378 + 379 + /* Update the display list based on the new crtc_x/y. */ 380 + vc4_plane_atomic_check(plane, plane_state); 381 + 382 + /* Note that we can't just call vc4_plane_write_dlist() 383 + * because that would smash the context data that the HVS is 384 + * currently using. 385 + */ 386 + writel(vc4_state->dlist[vc4_state->pos0_offset], 387 + &vc4_state->hw_dlist[vc4_state->pos0_offset]); 388 + writel(vc4_state->dlist[vc4_state->pos2_offset], 389 + &vc4_state->hw_dlist[vc4_state->pos2_offset]); 390 + writel(vc4_state->dlist[vc4_state->ptr0_offset], 391 + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 392 + 393 + return 0; 394 + 395 + out: 396 + return drm_atomic_helper_update_plane(plane, crtc, fb, 397 + crtc_x, crtc_y, 398 + crtc_w, crtc_h, 399 + src_x, src_y, 400 + src_w, src_h); 401 + } 402 + 741 403 static const struct drm_plane_funcs vc4_plane_funcs = { 742 - .update_plane = drm_atomic_helper_update_plane, 404 + .update_plane = vc4_update_plane, 743 405 .disable_plane = drm_atomic_helper_disable_plane, 744 406 .destroy = vc4_plane_destroy, 745 407 .set_property = NULL, ··· 829 341 struct drm_plane *plane = NULL; 830 342 struct vc4_plane *vc4_plane; 831 343 u32 formats[ARRAY_SIZE(hvs_formats)]; 344 + u32 num_formats = 0; 832 345 int ret = 0; 833 346 unsigned i; 834 347 ··· 840 351 goto fail; 841 352 } 842 353 843 - for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) 844 - formats[i] = hvs_formats[i].drm; 354 + for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 355 + /* Don't allow YUV in cursor planes, since that means 356 + * tuning on the scaler, which we don't allow for the 357 + * cursor. 358 + */ 359 + if (type != DRM_PLANE_TYPE_CURSOR || 360 + hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) { 361 + formats[num_formats++] = hvs_formats[i].drm; 362 + } 363 + } 845 364 plane = &vc4_plane->base; 846 365 ret = drm_universal_plane_init(dev, plane, 0xff, 847 366 &vc4_plane_funcs, 848 - formats, ARRAY_SIZE(formats), 367 + formats, num_formats, 849 368 type, NULL); 850 369 851 370 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
+101 -1
drivers/gpu/drm/vc4/vc4_regs.h
··· 503 503 HVS_PIXEL_FORMAT_RGB888 = 5, 504 504 HVS_PIXEL_FORMAT_RGBA6666 = 6, 505 505 /* 32bpp */ 506 - HVS_PIXEL_FORMAT_RGBA8888 = 7 506 + HVS_PIXEL_FORMAT_RGBA8888 = 7, 507 + 508 + HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8, 509 + HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, 510 + HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, 511 + HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, 507 512 }; 508 513 509 514 /* Note: the LSB is the rightmost character shown. Only valid for ··· 541 536 #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) 542 537 #define SCALER_CTL0_ORDER_SHIFT 13 543 538 539 + #define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) 540 + #define SCALER_CTL0_SCL1_SHIFT 8 541 + 542 + #define SCALER_CTL0_SCL0_MASK VC4_MASK(7, 5) 543 + #define SCALER_CTL0_SCL0_SHIFT 5 544 + 545 + #define SCALER_CTL0_SCL_H_PPF_V_PPF 0 546 + #define SCALER_CTL0_SCL_H_TPZ_V_PPF 1 547 + #define SCALER_CTL0_SCL_H_PPF_V_TPZ 2 548 + #define SCALER_CTL0_SCL_H_TPZ_V_TPZ 3 549 + #define SCALER_CTL0_SCL_H_PPF_V_NONE 4 550 + #define SCALER_CTL0_SCL_H_NONE_V_PPF 5 551 + #define SCALER_CTL0_SCL_H_NONE_V_TPZ 6 552 + #define SCALER_CTL0_SCL_H_TPZ_V_NONE 7 553 + 544 554 /* Set to indicate no scaling. */ 545 555 #define SCALER_CTL0_UNITY BIT(4) 546 556 ··· 571 551 #define SCALER_POS0_START_X_MASK VC4_MASK(11, 0) 572 552 #define SCALER_POS0_START_X_SHIFT 0 573 553 554 + #define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16) 555 + #define SCALER_POS1_SCL_HEIGHT_SHIFT 16 556 + 557 + #define SCALER_POS1_SCL_WIDTH_MASK VC4_MASK(11, 0) 558 + #define SCALER_POS1_SCL_WIDTH_SHIFT 0 559 + 574 560 #define SCALER_POS2_ALPHA_MODE_MASK VC4_MASK(31, 30) 575 561 #define SCALER_POS2_ALPHA_MODE_SHIFT 30 576 562 #define SCALER_POS2_ALPHA_MODE_PIPELINE 0 ··· 589 563 590 564 #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0) 591 565 #define SCALER_POS2_WIDTH_SHIFT 0 566 + 567 + /* Color Space Conversion words. Some values are S2.8 signed 568 + * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1, 569 + * 0x2: 2, 0x3: -1} 570 + */ 571 + /* bottom 8 bits of S2.8 contribution of Cr to Blue */ 572 + #define SCALER_CSC0_COEF_CR_BLU_MASK VC4_MASK(31, 24) 573 + #define SCALER_CSC0_COEF_CR_BLU_SHIFT 24 574 + /* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */ 575 + #define SCALER_CSC0_COEF_YY_OFS_MASK VC4_MASK(23, 16) 576 + #define SCALER_CSC0_COEF_YY_OFS_SHIFT 16 577 + /* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */ 578 + #define SCALER_CSC0_COEF_CB_OFS_MASK VC4_MASK(15, 8) 579 + #define SCALER_CSC0_COEF_CB_OFS_SHIFT 8 580 + /* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */ 581 + #define SCALER_CSC0_COEF_CR_OFS_MASK VC4_MASK(7, 0) 582 + #define SCALER_CSC0_COEF_CR_OFS_SHIFT 0 583 + #define SCALER_CSC0_ITR_R_601_5 0x00f00000 584 + #define SCALER_CSC0_ITR_R_709_3 0x00f00000 585 + #define SCALER_CSC0_JPEG_JFIF 0x00000000 586 + 587 + /* S2.8 contribution of Cb to Green */ 588 + #define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22) 589 + #define SCALER_CSC1_COEF_CB_GRN_SHIFT 22 590 + /* S2.8 contribution of Cr to Green */ 591 + #define SCALER_CSC1_COEF_CR_GRN_MASK VC4_MASK(21, 12) 592 + #define SCALER_CSC1_COEF_CR_GRN_SHIFT 12 593 + /* S2.8 contribution of Y to all of RGB */ 594 + #define SCALER_CSC1_COEF_YY_ALL_MASK VC4_MASK(11, 2) 595 + #define SCALER_CSC1_COEF_YY_ALL_SHIFT 2 596 + /* top 2 bits of S2.8 contribution of Cr to Blue */ 597 + #define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0) 598 + #define SCALER_CSC1_COEF_CR_BLU_SHIFT 0 599 + #define SCALER_CSC1_ITR_R_601_5 0xe73304a8 600 + #define SCALER_CSC1_ITR_R_709_3 0xf2b784a8 601 + #define SCALER_CSC1_JPEG_JFIF 0xea34a400 602 + 603 + /* S2.8 contribution of Cb to Red */ 604 + #define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20) 605 + #define SCALER_CSC2_COEF_CB_RED_SHIFT 20 606 + /* S2.8 contribution of Cr to Red */ 607 + #define SCALER_CSC2_COEF_CR_RED_MASK VC4_MASK(19, 10) 608 + #define SCALER_CSC2_COEF_CR_RED_SHIFT 10 609 + /* S2.8 contribution of Cb to Blue */ 610 + #define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10) 611 + #define SCALER_CSC2_COEF_CB_BLU_SHIFT 10 612 + #define SCALER_CSC2_ITR_R_601_5 0x00066204 613 + #define SCALER_CSC2_ITR_R_709_3 0x00072a1c 614 + #define SCALER_CSC2_JPEG_JFIF 0x000599c5 615 + 616 + #define SCALER_TPZ0_VERT_RECALC BIT(31) 617 + #define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) 618 + #define SCALER_TPZ0_SCALE_SHIFT 8 619 + #define SCALER_TPZ0_IPHASE_MASK VC4_MASK(7, 0) 620 + #define SCALER_TPZ0_IPHASE_SHIFT 0 621 + #define SCALER_TPZ1_RECIP_MASK VC4_MASK(15, 0) 622 + #define SCALER_TPZ1_RECIP_SHIFT 0 623 + 624 + /* Skips interpolating coefficients to 64 phases, so just 8 are used. 625 + * Required for nearest neighbor. 626 + */ 627 + #define SCALER_PPF_NOINTERP BIT(31) 628 + /* Replaes the highest valued coefficient with one that makes all 4 629 + * sum to unity. 630 + */ 631 + #define SCALER_PPF_AGC BIT(30) 632 + #define SCALER_PPF_SCALE_MASK VC4_MASK(24, 8) 633 + #define SCALER_PPF_SCALE_SHIFT 8 634 + #define SCALER_PPF_IPHASE_MASK VC4_MASK(6, 0) 635 + #define SCALER_PPF_IPHASE_SHIFT 0 636 + 637 + #define SCALER_PPF_KERNEL_OFFSET_MASK VC4_MASK(13, 0) 638 + #define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 639 + #define SCALER_PPF_KERNEL_UNCACHED BIT(31) 592 640 593 641 #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) 594 642 #define SCALER_SRC_PITCH_SHIFT 0