From e2d7760df5a65ce6ab119e026725c809b8e8bfe3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 2 Jul 2016 10:03:05 -0700 Subject: [PATCH] vc4: Add a bitmap of branch targets in kernel validation. This isn't used yet, it's just a first step toward loop validation. During the main parsing of instructions, we need to know when we hit a new basic block so that we can reset validated state. --- .../drivers/vc4/kernel/vc4_validate_shaders.c | 114 +++++++++++++++++- src/gallium/drivers/vc4/vc4_qpu_defines.h | 3 + .../drivers/vc4/vc4_simulator_validate.h | 18 +++ 3 files changed, 133 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index 9c77f9e6a7e..0ea6d073a99 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -60,6 +60,13 @@ struct vc4_shader_validation_state { */ uint32_t live_min_clamp_offsets[32 + 32 + 4]; bool live_max_clamp_regs[32 + 32 + 4]; + + /* Bitfield of which IPs are used as branch targets. + * + * Used for validation that the uniform stream is updated at the right + * points and clearing the texturing/clamping state. + */ + unsigned long *branch_targets; }; static uint32_t @@ -419,13 +426,104 @@ check_instruction_reads(uint64_t inst, return true; } +/* Make sure that all branches are absolute and point within the shader, and + * note their targets for later. + */ +static bool +vc4_validate_branches(struct vc4_shader_validation_state *validation_state) +{ + uint32_t max_branch_target = 0; + bool found_shader_end = false; + int ip; + int shader_end_ip = 0; + int last_branch = -2; + + for (ip = 0; ip < validation_state->max_ip; ip++) { + uint64_t inst = validation_state->shader[ip]; + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + uint32_t after_delay_ip = ip + 4; + uint32_t branch_target_ip; + + if (sig == QPU_SIG_PROG_END) { + shader_end_ip = ip; + found_shader_end = true; + continue; + } + + if (sig != QPU_SIG_BRANCH) + continue; + + if (ip - last_branch < 4) { + DRM_ERROR("Branch at %d during delay slots\n", ip); + return false; + } + last_branch = ip; + + if (inst & QPU_BRANCH_REG) { + DRM_ERROR("branching from register relative " + "not supported\n"); + return false; + } + + if (!(inst & QPU_BRANCH_REL)) { + DRM_ERROR("relative branching required\n"); + return false; + } + + /* The actual branch target is the instruction after the delay + * slots, plus whatever byte offset is in the low 32 bits of + * the instruction. Make sure we're not branching beyond the + * end of the shader object. + */ + if (branch_imm % sizeof(inst) != 0) { + DRM_ERROR("branch target not aligned\n"); + return false; + }; + + branch_target_ip = after_delay_ip + (branch_imm >> 3); + if (branch_target_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", + ip, branch_target_ip, + validation_state->max_ip); + return false; + } + set_bit(branch_target_ip, validation_state->branch_targets); + + /* Make sure that the non-branching path is also not outside + * the shader. + */ + if (after_delay_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d continues past shader end " + "(%d/%d)\n", + ip, after_delay_ip, validation_state->max_ip); + return false; + } + set_bit(after_delay_ip, validation_state->branch_targets); + max_branch_target = max(max_branch_target, after_delay_ip); + + /* There are two delay slots after program end is signaled + * that are still executed, then we're finished. + */ + if (found_shader_end && ip == shader_end_ip + 2) + break; + } + + if (max_branch_target > shader_end_ip) { + DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); + return false; + } + + return true; +} + struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { bool found_shader_end = false; int shader_end_ip = 0; uint32_t ip; - struct vc4_validated_shader_info *validated_shader; + struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; int i; @@ -438,9 +536,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) validation_state.live_min_clamp_offsets[i] = ~0; + validation_state.branch_targets = + kcalloc(BITS_TO_LONGS(validation_state.max_ip), + sizeof(unsigned long), GFP_KERNEL); + if (!validation_state.branch_targets) + goto fail; + validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); if (!validated_shader) - return NULL; + goto fail; + + if (!vc4_validate_branches(&validation_state)) + goto fail; for (ip = 0; ip < validation_state.max_ip; ip++) { uint64_t inst = validation_state.shader[ip]; @@ -509,9 +616,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) (validated_shader->uniforms_size + 4 * validated_shader->num_texture_samples); + kfree(validation_state.branch_targets); + return validated_shader; fail: + kfree(validation_state.branch_targets); if (validated_shader) { kfree(validated_shader->texture_samples); kfree(validated_shader); diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index 2ffa68663b1..c0b7c56c007 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -301,6 +301,9 @@ enum qpu_unpack { #define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_MASK QPU_MASK(28, 24) +#define QPU_LOAD_IMM_SHIFT 0 +#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0) + #define QPU_BRANCH_TARGET_SHIFT 0 #define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0) diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h index e1f8b5a305e..1352c9bafd2 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.h +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h @@ -47,6 +47,24 @@ struct vc4_exec_info; #define max(x, y) MAX2(x, y) #define min(x, y) MIN2(x, y) #define BUG_ON(condition) assert(!(condition)) +#define BIT(bit) (1u << bit) + +/* Unsigned long-based bitmap interface in the linux kernel */ +#define BITMAP_WORDBITS (sizeof(unsigned long) * 8) +#define BITS_TO_LONGS(bits) (roundup(bits, BITMAP_WORDBITS) / \ + sizeof(unsigned long)) +static inline bool +test_bit(unsigned int bit, unsigned long *addr) +{ + return addr[bit / BITMAP_WORDBITS] & (1ul << (bit % BITMAP_WORDBITS)); +} + +static inline bool +set_bit(unsigned int bit, unsigned long *addr) +{ + return addr[bit / BITMAP_WORDBITS] |= (1ul << (bit % BITMAP_WORDBITS)); +} + static inline int copy_from_user(void *dst, void *src, size_t size) -- 2.30.2