Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-vc4-next-2016-07-15' of https://github.com/anholt/linux into drm-next

This pull request brings in vc4 shader validation for branching,
allowing GLSL shaders with non-unrolled loops.

* tag 'drm-vc4-next-2016-07-15' of https://github.com/anholt/linux:
drm/vc4: Fix a "the the" typo in a comment.
drm/vc4: Fix definition of QPU_R_MS_REV_FLAGS
drm/vc4: Add a getparam to signal support for branches.
drm/vc4: Add support for branching in shader validation.
drm/vc4: Add a bitmap of branch targets during shader validation.
drm/vc4: Move validation's current/max ip into the validation struct.
drm/vc4: Add a getparam ioctl for getting the V3D identity regs.

+496 -44
+45
drivers/gpu/drm/vc4/vc4_drv.c
··· 14 14 #include <linux/module.h> 15 15 #include <linux/of_platform.h> 16 16 #include <linux/platform_device.h> 17 + #include <linux/pm_runtime.h> 17 18 #include "drm_fb_cma_helper.h" 18 19 19 20 #include "uapi/drm/vc4_drm.h" ··· 42 41 } 43 42 44 43 return map; 44 + } 45 + 46 + static int vc4_get_param_ioctl(struct drm_device *dev, void *data, 47 + struct drm_file *file_priv) 48 + { 49 + struct vc4_dev *vc4 = to_vc4_dev(dev); 50 + struct drm_vc4_get_param *args = data; 51 + int ret; 52 + 53 + if (args->pad != 0) 54 + return -EINVAL; 55 + 56 + switch (args->param) { 57 + case DRM_VC4_PARAM_V3D_IDENT0: 58 + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 59 + if (ret) 60 + return ret; 61 + args->value = V3D_READ(V3D_IDENT0); 62 + pm_runtime_put(&vc4->v3d->pdev->dev); 63 + break; 64 + case DRM_VC4_PARAM_V3D_IDENT1: 65 + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 66 + if (ret) 67 + return ret; 68 + args->value = V3D_READ(V3D_IDENT1); 69 + pm_runtime_put(&vc4->v3d->pdev->dev); 70 + break; 71 + case DRM_VC4_PARAM_V3D_IDENT2: 72 + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 73 + if (ret) 74 + return ret; 75 + args->value = V3D_READ(V3D_IDENT2); 76 + pm_runtime_put(&vc4->v3d->pdev->dev); 77 + break; 78 + case DRM_VC4_PARAM_SUPPORTS_BRANCHES: 79 + args->value = true; 80 + break; 81 + default: 82 + DRM_DEBUG("Unknown parameter %d\n", args->param); 83 + return -EINVAL; 84 + } 85 + 86 + return 0; 45 87 } 46 88 47 89 static void vc4_lastclose(struct drm_device *dev) ··· 118 74 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), 119 75 DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, 120 76 DRM_ROOT_ONLY), 77 + DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), 121 78 }; 122 79 123 80 static struct drm_driver vc4_drm_driver = {
+3
drivers/gpu/drm/vc4/vc4_drv.h
··· 355 355 uint32_t uniforms_src_size; 356 356 uint32_t num_texture_samples; 357 357 struct vc4_texture_sample_info *texture_samples; 358 + 359 + uint32_t num_uniform_addr_offsets; 360 + uint32_t *uniform_addr_offsets; 358 361 }; 359 362 360 363 /**
+16 -1
drivers/gpu/drm/vc4/vc4_qpu_defines.h
··· 70 70 QPU_R_ELEM_QPU = 38, 71 71 QPU_R_NOP, 72 72 QPU_R_XY_PIXEL_COORD = 41, 73 - QPU_R_MS_REV_FLAGS = 41, 73 + QPU_R_MS_REV_FLAGS = 42, 74 74 QPU_R_VPM = 48, 75 75 QPU_R_VPM_LD_BUSY, 76 76 QPU_R_VPM_LD_WAIT, ··· 230 230 #define QPU_COND_MUL_SHIFT 46 231 231 #define QPU_COND_MUL_MASK QPU_MASK(48, 46) 232 232 233 + #define QPU_BRANCH_COND_SHIFT 52 234 + #define QPU_BRANCH_COND_MASK QPU_MASK(55, 52) 235 + 236 + #define QPU_BRANCH_REL ((uint64_t)1 << 51) 237 + #define QPU_BRANCH_REG ((uint64_t)1 << 50) 238 + 239 + #define QPU_BRANCH_RADDR_A_SHIFT 45 240 + #define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45) 241 + 233 242 #define QPU_SF ((uint64_t)1 << 45) 234 243 235 244 #define QPU_WADDR_ADD_SHIFT 38 ··· 269 260 270 261 #define QPU_OP_ADD_SHIFT 24 271 262 #define QPU_OP_ADD_MASK QPU_MASK(28, 24) 263 + 264 + #define QPU_LOAD_IMM_SHIFT 0 265 + #define QPU_LOAD_IMM_MASK QPU_MASK(31, 0) 266 + 267 + #define QPU_BRANCH_TARGET_SHIFT 0 268 + #define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0) 272 269 273 270 #endif /* VC4_QPU_DEFINES_H */
+12 -1
drivers/gpu/drm/vc4/vc4_validate.c
··· 802 802 uint32_t src_offset = *(uint32_t *)(pkt_u + o); 803 803 uint32_t *texture_handles_u; 804 804 void *uniform_data_u; 805 - uint32_t tex; 805 + uint32_t tex, uni; 806 806 807 807 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; 808 808 ··· 838 838 texture_handles_u[tex])) { 839 839 return -EINVAL; 840 840 } 841 + } 842 + 843 + /* Fill in the uniform slots that need this shader's 844 + * start-of-uniforms address (used for resetting the uniform 845 + * stream in the presence of control flow). 846 + */ 847 + for (uni = 0; 848 + uni < validated_shader->num_uniform_addr_offsets; 849 + uni++) { 850 + uint32_t o = validated_shader->uniform_addr_offsets[uni]; 851 + ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p; 841 852 } 842 853 843 854 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+407 -42
drivers/gpu/drm/vc4/vc4_validate_shaders.c
··· 39 39 #include "vc4_drv.h" 40 40 #include "vc4_qpu_defines.h" 41 41 42 + #define LIVE_REG_COUNT (32 + 32 + 4) 43 + 42 44 struct vc4_shader_validation_state { 45 + /* Current IP being validated. */ 46 + uint32_t ip; 47 + 48 + /* IP at the end of the BO, do not read shader[max_ip] */ 49 + uint32_t max_ip; 50 + 51 + uint64_t *shader; 52 + 43 53 struct vc4_texture_sample_info tmu_setup[2]; 44 54 int tmu_write_count[2]; 45 55 ··· 59 49 * 60 50 * This is used for the validation of direct address memory reads. 61 51 */ 62 - uint32_t live_min_clamp_offsets[32 + 32 + 4]; 63 - bool live_max_clamp_regs[32 + 32 + 4]; 52 + uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 53 + bool live_max_clamp_regs[LIVE_REG_COUNT]; 54 + uint32_t live_immediates[LIVE_REG_COUNT]; 55 + 56 + /* Bitfield of which IPs are used as branch targets. 57 + * 58 + * Used for validation that the uniform stream is updated at the right 59 + * points and clearing the texturing/clamping state. 60 + */ 61 + unsigned long *branch_targets; 62 + 63 + /* Set when entering a basic block, and cleared when the uniform 64 + * address update is found. This is used to make sure that we don't 65 + * read uniforms when the address is undefined. 66 + */ 67 + bool needs_uniform_address_update; 68 + 69 + /* Set when we find a backwards branch. If the branch is backwards, 70 + * the taraget is probably doing an address reset to read uniforms, 71 + * and so we need to be sure that a uniforms address is present in the 72 + * stream, even if the shader didn't need to read uniforms in later 73 + * basic blocks. 74 + */ 75 + bool needs_uniform_address_for_loop; 64 76 }; 65 77 66 78 static uint32_t ··· 161 129 } 162 130 163 131 static bool 164 - check_tmu_write(uint64_t inst, 165 - struct vc4_validated_shader_info *validated_shader, 132 + check_tmu_write(struct vc4_validated_shader_info *validated_shader, 166 133 struct vc4_shader_validation_state *validation_state, 167 134 bool is_mul) 168 135 { 136 + uint64_t inst = validation_state->shader[validation_state->ip]; 169 137 uint32_t waddr = (is_mul ? 170 138 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 171 139 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); ··· 194 162 return false; 195 163 } 196 164 197 - /* We assert that the the clamped address is the first 165 + /* We assert that the clamped address is the first 198 166 * argument, and the UBO base address is the second argument. 199 167 * This is arbitrary, but simpler than supporting flipping the 200 168 * two either way. ··· 244 212 /* Since direct uses a RADDR uniform reference, it will get counted in 245 213 * check_instruction_reads() 246 214 */ 247 - if (!is_direct) 215 + if (!is_direct) { 216 + if (validation_state->needs_uniform_address_update) { 217 + DRM_ERROR("Texturing with undefined uniform address\n"); 218 + return false; 219 + } 220 + 248 221 validated_shader->uniforms_size += 4; 222 + } 249 223 250 224 if (submit) { 251 225 if (!record_texture_sample(validated_shader, ··· 265 227 return true; 266 228 } 267 229 230 + static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 231 + { 232 + uint32_t o = validated_shader->num_uniform_addr_offsets; 233 + uint32_t num_uniforms = validated_shader->uniforms_size / 4; 234 + 235 + validated_shader->uniform_addr_offsets = 236 + krealloc(validated_shader->uniform_addr_offsets, 237 + (o + 1) * 238 + sizeof(*validated_shader->uniform_addr_offsets), 239 + GFP_KERNEL); 240 + if (!validated_shader->uniform_addr_offsets) 241 + return false; 242 + 243 + validated_shader->uniform_addr_offsets[o] = num_uniforms; 244 + validated_shader->num_uniform_addr_offsets++; 245 + 246 + return true; 247 + } 248 + 268 249 static bool 269 - check_reg_write(uint64_t inst, 270 - struct vc4_validated_shader_info *validated_shader, 250 + validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 251 + struct vc4_shader_validation_state *validation_state, 252 + bool is_mul) 253 + { 254 + uint64_t inst = validation_state->shader[validation_state->ip]; 255 + u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 256 + u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 257 + u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 258 + u32 add_lri = raddr_add_a_to_live_reg_index(inst); 259 + /* We want our reset to be pointing at whatever uniform follows the 260 + * uniforms base address. 261 + */ 262 + u32 expected_offset = validated_shader->uniforms_size + 4; 263 + 264 + /* We only support absolute uniform address changes, and we 265 + * require that they be in the current basic block before any 266 + * of its uniform reads. 267 + * 268 + * One could potentially emit more efficient QPU code, by 269 + * noticing that (say) an if statement does uniform control 270 + * flow for all threads and that the if reads the same number 271 + * of uniforms on each side. However, this scheme is easy to 272 + * validate so it's all we allow for now. 273 + */ 274 + 275 + if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { 276 + DRM_ERROR("uniforms address change must be " 277 + "normal math\n"); 278 + return false; 279 + } 280 + 281 + if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 282 + DRM_ERROR("Uniform address reset must be an ADD.\n"); 283 + return false; 284 + } 285 + 286 + if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 287 + DRM_ERROR("Uniform address reset must be unconditional.\n"); 288 + return false; 289 + } 290 + 291 + if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 292 + !(inst & QPU_PM)) { 293 + DRM_ERROR("No packing allowed on uniforms reset\n"); 294 + return false; 295 + } 296 + 297 + if (add_lri == -1) { 298 + DRM_ERROR("First argument of uniform address write must be " 299 + "an immediate value.\n"); 300 + return false; 301 + } 302 + 303 + if (validation_state->live_immediates[add_lri] != expected_offset) { 304 + DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", 305 + validation_state->live_immediates[add_lri], 306 + expected_offset); 307 + return false; 308 + } 309 + 310 + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 311 + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 312 + DRM_ERROR("Second argument of uniform address write must be " 313 + "a uniform.\n"); 314 + return false; 315 + } 316 + 317 + validation_state->needs_uniform_address_update = false; 318 + validation_state->needs_uniform_address_for_loop = false; 319 + return require_uniform_address_uniform(validated_shader); 320 + } 321 + 322 + static bool 323 + check_reg_write(struct vc4_validated_shader_info *validated_shader, 271 324 struct vc4_shader_validation_state *validation_state, 272 325 bool is_mul) 273 326 { 327 + uint64_t inst = validation_state->shader[validation_state->ip]; 274 328 uint32_t waddr = (is_mul ? 275 329 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 276 330 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 331 + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 332 + bool ws = inst & QPU_WS; 333 + bool is_b = is_mul ^ ws; 334 + u32 lri = waddr_to_live_reg_index(waddr, is_b); 335 + 336 + if (lri != -1) { 337 + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 338 + uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 339 + 340 + if (sig == QPU_SIG_LOAD_IMM && 341 + QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 342 + ((is_mul && cond_mul == QPU_COND_ALWAYS) || 343 + (!is_mul && cond_add == QPU_COND_ALWAYS))) { 344 + validation_state->live_immediates[lri] = 345 + QPU_GET_FIELD(inst, QPU_LOAD_IMM); 346 + } else { 347 + validation_state->live_immediates[lri] = ~0; 348 + } 349 + } 277 350 278 351 switch (waddr) { 279 352 case QPU_W_UNIFORMS_ADDRESS: 280 - /* XXX: We'll probably need to support this for reladdr, but 281 - * it's definitely a security-related one. 282 - */ 283 - DRM_ERROR("uniforms address load unsupported\n"); 284 - return false; 353 + if (is_b) { 354 + DRM_ERROR("relative uniforms address change " 355 + "unsupported\n"); 356 + return false; 357 + } 358 + 359 + return validate_uniform_address_write(validated_shader, 360 + validation_state, 361 + is_mul); 285 362 286 363 case QPU_W_TLB_COLOR_MS: 287 364 case QPU_W_TLB_COLOR_ALL: ··· 414 261 case QPU_W_TMU1_T: 415 262 case QPU_W_TMU1_R: 416 263 case QPU_W_TMU1_B: 417 - return check_tmu_write(inst, validated_shader, validation_state, 264 + return check_tmu_write(validated_shader, validation_state, 418 265 is_mul); 419 266 420 267 case QPU_W_HOST_INT: ··· 447 294 } 448 295 449 296 static void 450 - track_live_clamps(uint64_t inst, 451 - struct vc4_validated_shader_info *validated_shader, 297 + track_live_clamps(struct vc4_validated_shader_info *validated_shader, 452 298 struct vc4_shader_validation_state *validation_state) 453 299 { 300 + uint64_t inst = validation_state->shader[validation_state->ip]; 454 301 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 455 302 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 456 303 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); ··· 522 369 } 523 370 524 371 static bool 525 - check_instruction_writes(uint64_t inst, 526 - struct vc4_validated_shader_info *validated_shader, 372 + check_instruction_writes(struct vc4_validated_shader_info *validated_shader, 527 373 struct vc4_shader_validation_state *validation_state) 528 374 { 375 + uint64_t inst = validation_state->shader[validation_state->ip]; 529 376 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 530 377 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 531 378 bool ok; ··· 535 382 return false; 536 383 } 537 384 538 - ok = (check_reg_write(inst, validated_shader, validation_state, 539 - false) && 540 - check_reg_write(inst, validated_shader, validation_state, 541 - true)); 385 + ok = (check_reg_write(validated_shader, validation_state, false) && 386 + check_reg_write(validated_shader, validation_state, true)); 542 387 543 - track_live_clamps(inst, validated_shader, validation_state); 388 + track_live_clamps(validated_shader, validation_state); 544 389 545 390 return ok; 546 391 } 547 392 548 393 static bool 549 - check_instruction_reads(uint64_t inst, 550 - struct vc4_validated_shader_info *validated_shader) 394 + check_branch(uint64_t inst, 395 + struct vc4_validated_shader_info *validated_shader, 396 + struct vc4_shader_validation_state *validation_state, 397 + int ip) 551 398 { 399 + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 400 + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 401 + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 402 + 403 + if ((int)branch_imm < 0) 404 + validation_state->needs_uniform_address_for_loop = true; 405 + 406 + /* We don't want to have to worry about validation of this, and 407 + * there's no need for it. 408 + */ 409 + if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 410 + DRM_ERROR("branch instruction at %d wrote a register.\n", 411 + validation_state->ip); 412 + return false; 413 + } 414 + 415 + return true; 416 + } 417 + 418 + static bool 419 + check_instruction_reads(struct vc4_validated_shader_info *validated_shader, 420 + struct vc4_shader_validation_state *validation_state) 421 + { 422 + uint64_t inst = validation_state->shader[validation_state->ip]; 552 423 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 553 424 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 554 425 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); ··· 584 407 * already be OOM. 585 408 */ 586 409 validated_shader->uniforms_size += 4; 410 + 411 + if (validation_state->needs_uniform_address_update) { 412 + DRM_ERROR("Uniform read with undefined uniform " 413 + "address\n"); 414 + return false; 415 + } 587 416 } 417 + 418 + return true; 419 + } 420 + 421 + /* Make sure that all branches are absolute and point within the shader, and 422 + * note their targets for later. 423 + */ 424 + static bool 425 + vc4_validate_branches(struct vc4_shader_validation_state *validation_state) 426 + { 427 + uint32_t max_branch_target = 0; 428 + bool found_shader_end = false; 429 + int ip; 430 + int shader_end_ip = 0; 431 + int last_branch = -2; 432 + 433 + for (ip = 0; ip < validation_state->max_ip; ip++) { 434 + uint64_t inst = validation_state->shader[ip]; 435 + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 436 + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 437 + uint32_t after_delay_ip = ip + 4; 438 + uint32_t branch_target_ip; 439 + 440 + if (sig == QPU_SIG_PROG_END) { 441 + shader_end_ip = ip; 442 + found_shader_end = true; 443 + continue; 444 + } 445 + 446 + if (sig != QPU_SIG_BRANCH) 447 + continue; 448 + 449 + if (ip - last_branch < 4) { 450 + DRM_ERROR("Branch at %d during delay slots\n", ip); 451 + return false; 452 + } 453 + last_branch = ip; 454 + 455 + if (inst & QPU_BRANCH_REG) { 456 + DRM_ERROR("branching from register relative " 457 + "not supported\n"); 458 + return false; 459 + } 460 + 461 + if (!(inst & QPU_BRANCH_REL)) { 462 + DRM_ERROR("relative branching required\n"); 463 + return false; 464 + } 465 + 466 + /* The actual branch target is the instruction after the delay 467 + * slots, plus whatever byte offset is in the low 32 bits of 468 + * the instruction. Make sure we're not branching beyond the 469 + * end of the shader object. 470 + */ 471 + if (branch_imm % sizeof(inst) != 0) { 472 + DRM_ERROR("branch target not aligned\n"); 473 + return false; 474 + } 475 + 476 + branch_target_ip = after_delay_ip + (branch_imm >> 3); 477 + if (branch_target_ip >= validation_state->max_ip) { 478 + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", 479 + ip, branch_target_ip, 480 + validation_state->max_ip); 481 + return false; 482 + } 483 + set_bit(branch_target_ip, validation_state->branch_targets); 484 + 485 + /* Make sure that the non-branching path is also not outside 486 + * the shader. 487 + */ 488 + if (after_delay_ip >= validation_state->max_ip) { 489 + DRM_ERROR("Branch at %d continues past shader end " 490 + "(%d/%d)\n", 491 + ip, after_delay_ip, validation_state->max_ip); 492 + return false; 493 + } 494 + set_bit(after_delay_ip, validation_state->branch_targets); 495 + max_branch_target = max(max_branch_target, after_delay_ip); 496 + 497 + /* There are two delay slots after program end is signaled 498 + * that are still executed, then we're finished. 499 + */ 500 + if (found_shader_end && ip == shader_end_ip + 2) 501 + break; 502 + } 503 + 504 + if (max_branch_target > shader_end_ip) { 505 + DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); 506 + return false; 507 + } 508 + 509 + return true; 510 + } 511 + 512 + /* Resets any known state for the shader, used when we may be branched to from 513 + * multiple locations in the program (or at shader start). 514 + */ 515 + static void 516 + reset_validation_state(struct vc4_shader_validation_state *validation_state) 517 + { 518 + int i; 519 + 520 + for (i = 0; i < 8; i++) 521 + validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 522 + 523 + for (i = 0; i < LIVE_REG_COUNT; i++) { 524 + validation_state->live_min_clamp_offsets[i] = ~0; 525 + validation_state->live_max_clamp_regs[i] = false; 526 + validation_state->live_immediates[i] = ~0; 527 + } 528 + } 529 + 530 + static bool 531 + texturing_in_progress(struct vc4_shader_validation_state *validation_state) 532 + { 533 + return (validation_state->tmu_write_count[0] != 0 || 534 + validation_state->tmu_write_count[1] != 0); 535 + } 536 + 537 + static bool 538 + vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 539 + { 540 + uint32_t ip = validation_state->ip; 541 + 542 + if (!test_bit(ip, validation_state->branch_targets)) 543 + return true; 544 + 545 + if (texturing_in_progress(validation_state)) { 546 + DRM_ERROR("Branch target landed during TMU setup\n"); 547 + return false; 548 + } 549 + 550 + /* Reset our live values tracking, since this instruction may have 551 + * multiple predecessors. 552 + * 553 + * One could potentially do analysis to determine that, for 554 + * example, all predecessors have a live max clamp in the same 555 + * register, but we don't bother with that. 556 + */ 557 + reset_validation_state(validation_state); 558 + 559 + /* Since we've entered a basic block from potentially multiple 560 + * predecessors, we need the uniforms address to be updated before any 561 + * unforms are read. We require that after any branch point, the next 562 + * uniform to be loaded is a uniform address offset. That uniform's 563 + * offset will be marked by the uniform address register write 564 + * validation, or a one-off the end-of-program check. 565 + */ 566 + validation_state->needs_uniform_address_update = true; 588 567 589 568 return true; 590 569 } ··· 750 417 { 751 418 bool found_shader_end = false; 752 419 int shader_end_ip = 0; 753 - uint32_t ip, max_ip; 754 - uint64_t *shader; 755 - struct vc4_validated_shader_info *validated_shader; 420 + uint32_t ip; 421 + struct vc4_validated_shader_info *validated_shader = NULL; 756 422 struct vc4_shader_validation_state validation_state; 757 - int i; 758 423 759 424 memset(&validation_state, 0, sizeof(validation_state)); 425 + validation_state.shader = shader_obj->vaddr; 426 + validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 760 427 761 - for (i = 0; i < 8; i++) 762 - validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; 763 - for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) 764 - validation_state.live_min_clamp_offsets[i] = ~0; 428 + reset_validation_state(&validation_state); 765 429 766 - shader = shader_obj->vaddr; 767 - max_ip = shader_obj->base.size / sizeof(uint64_t); 430 + validation_state.branch_targets = 431 + kcalloc(BITS_TO_LONGS(validation_state.max_ip), 432 + sizeof(unsigned long), GFP_KERNEL); 433 + if (!validation_state.branch_targets) 434 + goto fail; 768 435 769 436 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 770 437 if (!validated_shader) 771 - return NULL; 438 + goto fail; 772 439 773 - for (ip = 0; ip < max_ip; ip++) { 774 - uint64_t inst = shader[ip]; 440 + if (!vc4_validate_branches(&validation_state)) 441 + goto fail; 442 + 443 + for (ip = 0; ip < validation_state.max_ip; ip++) { 444 + uint64_t inst = validation_state.shader[ip]; 775 445 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 446 + 447 + validation_state.ip = ip; 448 + 449 + if (!vc4_handle_branch_target(&validation_state)) 450 + goto fail; 776 451 777 452 switch (sig) { 778 453 case QPU_SIG_NONE: ··· 791 450 case QPU_SIG_LOAD_TMU1: 792 451 case QPU_SIG_PROG_END: 793 452 case QPU_SIG_SMALL_IMM: 794 - if (!check_instruction_writes(inst, validated_shader, 453 + if (!check_instruction_writes(validated_shader, 795 454 &validation_state)) { 796 455 DRM_ERROR("Bad write at ip %d\n", ip); 797 456 goto fail; 798 457 } 799 458 800 - if (!check_instruction_reads(inst, validated_shader)) 459 + if (!check_instruction_reads(validated_shader, 460 + &validation_state)) 801 461 goto fail; 802 462 803 463 if (sig == QPU_SIG_PROG_END) { ··· 809 467 break; 810 468 811 469 case QPU_SIG_LOAD_IMM: 812 - if (!check_instruction_writes(inst, validated_shader, 470 + if (!check_instruction_writes(validated_shader, 813 471 &validation_state)) { 814 472 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); 815 473 goto fail; 816 474 } 817 475 break; 818 476 477 + case QPU_SIG_BRANCH: 478 + if (!check_branch(inst, validated_shader, 479 + &validation_state, ip)) 480 + goto fail; 481 + break; 819 482 default: 820 483 DRM_ERROR("Unsupported QPU signal %d at " 821 484 "instruction %d\n", sig, ip); ··· 834 487 break; 835 488 } 836 489 837 - if (ip == max_ip) { 490 + if (ip == validation_state.max_ip) { 838 491 DRM_ERROR("shader failed to terminate before " 839 492 "shader BO end at %zd\n", 840 493 shader_obj->base.size); 841 494 goto fail; 495 + } 496 + 497 + /* If we did a backwards branch and we haven't emitted a uniforms 498 + * reset since then, we still need the uniforms stream to have the 499 + * uniforms address available so that the backwards branch can do its 500 + * uniforms reset. 501 + * 502 + * We could potentially prove that the backwards branch doesn't 503 + * contain any uses of uniforms until program exit, but that doesn't 504 + * seem to be worth the trouble. 505 + */ 506 + if (validation_state.needs_uniform_address_for_loop) { 507 + if (!require_uniform_address_uniform(validated_shader)) 508 + goto fail; 509 + validated_shader->uniforms_size += 4; 842 510 } 843 511 844 512 /* Again, no chance of integer overflow here because the worst case ··· 864 502 (validated_shader->uniforms_size + 865 503 4 * validated_shader->num_texture_samples); 866 504 505 + kfree(validation_state.branch_targets); 506 + 867 507 return validated_shader; 868 508 869 509 fail: 510 + kfree(validation_state.branch_targets); 870 511 if (validated_shader) { 871 512 kfree(validated_shader->texture_samples); 872 513 kfree(validated_shader);
+13
include/uapi/drm/vc4_drm.h
··· 37 37 #define DRM_VC4_MMAP_BO 0x04 38 38 #define DRM_VC4_CREATE_SHADER_BO 0x05 39 39 #define DRM_VC4_GET_HANG_STATE 0x06 40 + #define DRM_VC4_GET_PARAM 0x07 40 41 41 42 #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) 42 43 #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) ··· 46 45 #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) 47 46 #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) 48 47 #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) 48 + #define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param) 49 49 50 50 struct drm_vc4_submit_rcl_surface { 51 51 __u32 hindex; /* Handle index, or ~0 if not present. */ ··· 280 278 281 279 /* Pad that we may save more registers into in the future. */ 282 280 __u32 pad[16]; 281 + }; 282 + 283 + #define DRM_VC4_PARAM_V3D_IDENT0 0 284 + #define DRM_VC4_PARAM_V3D_IDENT1 1 285 + #define DRM_VC4_PARAM_V3D_IDENT2 2 286 + #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 287 + 288 + struct drm_vc4_get_param { 289 + __u32 param; 290 + __u32 pad; 291 + __u64 value; 283 292 }; 284 293 285 294 #if defined(__cplusplus)