Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order.

This is useful to allow GL to provide defined results for overlapping
glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
window movement without first blitting to a temporary. x11perf
-copywinwin100 goes from 1850/sec to 4850/sec.

v2: Default to the same behavior as before when the flags aren't
passed. (suggested by Boris)

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>

+32 -6
+1
drivers/gpu/drm/vc4/vc4_drv.c
··· 99 99 case DRM_VC4_PARAM_SUPPORTS_BRANCHES: 100 100 case DRM_VC4_PARAM_SUPPORTS_ETC1: 101 101 case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: 102 + case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: 102 103 args->value = true; 103 104 break; 104 105 default:
+4 -1
drivers/gpu/drm/vc4/vc4_gem.c
··· 1007 1007 struct ww_acquire_ctx acquire_ctx; 1008 1008 int ret = 0; 1009 1009 1010 - if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 1010 + if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | 1011 + VC4_SUBMIT_CL_FIXED_RCL_ORDER | 1012 + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | 1013 + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { 1011 1014 DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); 1012 1015 return -EINVAL; 1013 1016 }
+16 -5
drivers/gpu/drm/vc4/vc4_render_cl.c
··· 261 261 uint8_t max_y_tile = args->max_y_tile; 262 262 uint8_t xtiles = max_x_tile - min_x_tile + 1; 263 263 uint8_t ytiles = max_y_tile - min_y_tile + 1; 264 - uint8_t x, y; 264 + uint8_t xi, yi; 265 265 uint32_t size, loop_body_size; 266 + bool positive_x = true; 267 + bool positive_y = true; 268 + 269 + if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) { 270 + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)) 271 + positive_x = false; 272 + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) 273 + positive_y = false; 274 + } 266 275 267 276 size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; 268 277 loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; ··· 363 354 rcl_u16(setup, args->height); 364 355 rcl_u16(setup, args->color_write.bits); 365 356 366 - for (y = min_y_tile; y <= max_y_tile; y++) { 367 - for (x = min_x_tile; x <= max_x_tile; x++) { 368 - bool first = (x == min_x_tile && y == min_y_tile); 369 - bool last = (x == max_x_tile && y == max_y_tile); 357 + for (yi = 0; yi < ytiles; yi++) { 358 + int y = positive_y ? min_y_tile + yi : max_y_tile - yi; 359 + for (xi = 0; xi < xtiles; xi++) { 360 + int x = positive_x ? min_x_tile + xi : max_x_tile - xi; 361 + bool first = (xi == 0 && yi == 0); 362 + bool last = (xi == xtiles - 1 && yi == ytiles - 1); 370 363 371 364 emit_tile(exec, setup, x, y, first, last); 372 365 }
+11
include/uapi/drm/vc4_drm.h
··· 155 155 __u32 pad:24; 156 156 157 157 #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) 158 + /* By default, the kernel gets to choose the order that the tiles are 159 + * rendered in. If this is set, then the tiles will be rendered in a 160 + * raster order, with the right-to-left vs left-to-right and 161 + * top-to-bottom vs bottom-to-top dictated by 162 + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping 163 + * blits to be implemented using the 3D engine. 164 + */ 165 + #define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) 166 + #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) 167 + #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) 158 168 __u32 flags; 159 169 160 170 /* Returned value of the seqno of this render job (for the ··· 304 294 #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 305 295 #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 306 296 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 297 + #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 307 298 308 299 struct drm_vc4_get_param { 309 300 __u32 param;