From 5debfeb86f524b06aa3b80f9855947a57b5f91e8 Mon Sep 17 00:00:00 2001 From: Jonas Pfeil Date: Thu, 10 Nov 2016 17:52:03 -0800 Subject: [PATCH] vc4: Add simulator kernel validation for multithreaded fragment shaders. This is Jonas Pfeil's code from the kernel, brought back to Mesa by anholt. --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 2 + src/gallium/drivers/vc4/kernel/vc4_validate.c | 17 +++-- .../drivers/vc4/kernel/vc4_validate_shaders.c | 62 +++++++++++++++++++ 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 90f45397d83..8f5ed00d96f 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -150,6 +150,8 @@ struct vc4_validated_shader_info uint32_t num_uniform_addr_offsets; uint32_t *uniform_addr_offsets; + + bool is_threaded; }; /* vc4_validate.c */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index a9dce1fa379..bd193b993b3 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -780,11 +780,6 @@ validate_gl_shader_rec(struct drm_device *dev, exec->shader_rec_v += roundup(packet_size, 16); exec->shader_rec_size -= packet_size; - if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) { - DRM_ERROR("Multi-threaded fragment shaders not supported.\n"); - return -EINVAL; - } - for (i = 0; i < shader_reloc_count; i++) { if (src_handles[i] > exec->bo_count) { DRM_ERROR("Shader handle %d too big\n", src_handles[i]); @@ -801,6 +796,18 @@ validate_gl_shader_rec(struct drm_device *dev, return -EINVAL; } + if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) != + to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) { + DRM_ERROR("Thread mode of CL and FS do not match\n"); + return -EINVAL; + } + + if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded || + to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) { + DRM_ERROR("cs and vs cannot be threaded\n"); + return -EINVAL; + } + for (i = 0; i < shader_reloc_count; i++) { struct vc4_validated_shader_info *validated_shader; uint32_t o = shader_reloc_offsets[i]; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index 0ff3d01f3f2..d93f5239d7d 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -84,6 +84,14 @@ struct vc4_shader_validation_state { * basic blocks. */ bool needs_uniform_address_for_loop; + + /* Set when we find an instruction which violates the criterion for a + * threaded shader. These are: + * - only write the lower half of the register space + * - last thread switch signaled at the end + * So track the usage of the thread switches and the register usage. + */ + bool all_registers_used; }; static uint32_t @@ -119,6 +127,12 @@ raddr_add_a_to_live_reg_index(uint64_t inst) return ~0; } +static bool live_reg_is_upper_half(uint32_t lri) +{ + return (lri >=16 && lri < 32) || + (lri >=32 + 16 && lri < 32 + 32); +} + static bool is_tmu_submit(uint32_t waddr) { @@ -385,6 +399,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader, } else { validation_state->live_immediates[lri] = ~0; } + + if (live_reg_is_upper_half(lri)) + validation_state->all_registers_used = true; } switch (waddr) { @@ -593,6 +610,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader, } } + if ((raddr_a >= 16 && raddr_a < 32) || + (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { + validation_state->all_registers_used = true; + } + return true; } @@ -748,6 +770,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { bool found_shader_end = false; int shader_end_ip = 0; + uint32_t last_thread_switch_ip = -3; uint32_t ip; struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; @@ -780,6 +803,16 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) if (!vc4_handle_branch_target(&validation_state)) goto fail; + if (ip == last_thread_switch_ip + 3) { + /* Reset r0-r3 live clamp data */ + int i; + for (i = 64; i < LIVE_REG_COUNT; i++) { + validation_state.live_min_clamp_offsets[i] = ~0; + validation_state.live_max_clamp_regs[i] = false; + validation_state.live_immediates[i] = ~0; + } + } + switch (sig) { case QPU_SIG_NONE: case QPU_SIG_WAIT_FOR_SCOREBOARD: @@ -789,6 +822,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) case QPU_SIG_LOAD_TMU1: case QPU_SIG_PROG_END: case QPU_SIG_SMALL_IMM: + case QPU_SIG_THREAD_SWITCH: + case QPU_SIG_LAST_THREAD_SWITCH: if (!check_instruction_writes(validated_shader, &validation_state)) { DRM_ERROR("Bad write at ip %d\n", ip); @@ -804,6 +839,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) shader_end_ip = ip; } + if (sig == QPU_SIG_THREAD_SWITCH || + sig == QPU_SIG_LAST_THREAD_SWITCH) { + validated_shader->is_threaded = true; + + if (ip < last_thread_switch_ip + 3) { + DRM_ERROR("Thread switch too soon after " + "last switch at ip %d\n", ip); + goto fail; + } + last_thread_switch_ip = ip; + } + break; case QPU_SIG_LOAD_IMM: @@ -818,6 +865,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) if (!check_branch(inst, validated_shader, &validation_state, ip)) goto fail; + + if (ip < last_thread_switch_ip + 3) { + DRM_ERROR("Branch in thread switch at ip %d", + ip); + goto fail; + } + break; default: DRM_ERROR("Unsupported QPU signal %d at " @@ -839,6 +893,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) goto fail; } + /* Might corrupt other thread */ + if (validated_shader->is_threaded && + validation_state.all_registers_used) { + DRM_ERROR("Shader uses threading, but uses the upper " + "half of the registers, too\n"); + goto fail; + } + /* If we did a backwards branch and we haven't emitted a uniforms * reset since then, we still need the uniforms stream to have the * uniforms address available so that the backwards branch can do its -- 2.30.2