vc4: Add a bitmap of branch targets in kernel validation.
authorEric Anholt <eric@anholt.net>
Sat, 2 Jul 2016 17:03:05 +0000 (10:03 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 13 Jul 2016 00:42:38 +0000 (17:42 -0700)
This isn't used yet, it's just a first step toward loop validation.
During the main parsing of instructions, we need to know when we hit a new
basic block so that we can reset validated state.

src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
src/gallium/drivers/vc4/vc4_qpu_defines.h
src/gallium/drivers/vc4/vc4_simulator_validate.h

index 9c77f9e6a7e98588397a97df47b7c220778e9979..0ea6d073a996ec99183ead2fec3c444156b1d5fa 100644 (file)
@@ -60,6 +60,13 @@ struct vc4_shader_validation_state {
         */
        uint32_t live_min_clamp_offsets[32 + 32 + 4];
        bool live_max_clamp_regs[32 + 32 + 4];
+
+       /* Bitfield of which IPs are used as branch targets.
+        *
+        * Used for validation that the uniform stream is updated at the right
+        * points and clearing the texturing/clamping state.
+        */
+       unsigned long *branch_targets;
 };
 
 static uint32_t
@@ -419,13 +426,104 @@ check_instruction_reads(uint64_t inst,
        return true;
 }
 
+/* Make sure that all branches are absolute and point within the shader, and
+ * note their targets for later.
+ */
+static bool
+vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
+{
+       uint32_t max_branch_target = 0;
+       bool found_shader_end = false;
+       int ip;
+       int shader_end_ip = 0;
+       int last_branch = -2;
+
+       for (ip = 0; ip < validation_state->max_ip; ip++) {
+               uint64_t inst = validation_state->shader[ip];
+               int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+               uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+               uint32_t after_delay_ip = ip + 4;
+               uint32_t branch_target_ip;
+
+               if (sig == QPU_SIG_PROG_END) {
+                       shader_end_ip = ip;
+                       found_shader_end = true;
+                       continue;
+               }
+
+               if (sig != QPU_SIG_BRANCH)
+                       continue;
+
+               if (ip - last_branch < 4) {
+                       DRM_ERROR("Branch at %d during delay slots\n", ip);
+                       return false;
+               }
+               last_branch = ip;
+
+               if (inst & QPU_BRANCH_REG) {
+                       DRM_ERROR("branching from register relative "
+                                 "not supported\n");
+                       return false;
+               }
+
+               if (!(inst & QPU_BRANCH_REL)) {
+                       DRM_ERROR("relative branching required\n");
+                       return false;
+               }
+
+               /* The actual branch target is the instruction after the delay
+                * slots, plus whatever byte offset is in the low 32 bits of
+                * the instruction.  Make sure we're not branching beyond the
+                * end of the shader object.
+                */
+               if (branch_imm % sizeof(inst) != 0) {
+                       DRM_ERROR("branch target not aligned\n");
+                       return false;
+               };
+
+               branch_target_ip = after_delay_ip + (branch_imm >> 3);
+               if (branch_target_ip >= validation_state->max_ip) {
+                       DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
+                                 ip, branch_target_ip,
+                                 validation_state->max_ip);
+                       return false;
+               }
+               set_bit(branch_target_ip, validation_state->branch_targets);
+
+               /* Make sure that the non-branching path is also not outside
+                * the shader.
+                */
+               if (after_delay_ip >= validation_state->max_ip) {
+                       DRM_ERROR("Branch at %d continues past shader end "
+                                 "(%d/%d)\n",
+                                 ip, after_delay_ip, validation_state->max_ip);
+                       return false;
+               }
+               set_bit(after_delay_ip, validation_state->branch_targets);
+               max_branch_target = max(max_branch_target, after_delay_ip);
+
+               /* There are two delay slots after program end is signaled
+                * that are still executed, then we're finished.
+                */
+               if (found_shader_end && ip == shader_end_ip + 2)
+                       break;
+       }
+
+       if (max_branch_target > shader_end_ip) {
+               DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
+               return false;
+       }
+
+       return true;
+}
+
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 {
        bool found_shader_end = false;
        int shader_end_ip = 0;
        uint32_t ip;
-       struct vc4_validated_shader_info *validated_shader;
+       struct vc4_validated_shader_info *validated_shader = NULL;
        struct vc4_shader_validation_state validation_state;
        int i;
 
@@ -438,9 +536,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
        for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
                validation_state.live_min_clamp_offsets[i] = ~0;
 
+       validation_state.branch_targets =
+               kcalloc(BITS_TO_LONGS(validation_state.max_ip),
+                       sizeof(unsigned long), GFP_KERNEL);
+       if (!validation_state.branch_targets)
+               goto fail;
+
        validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
        if (!validated_shader)
-               return NULL;
+               goto fail;
+
+       if (!vc4_validate_branches(&validation_state))
+               goto fail;
 
        for (ip = 0; ip < validation_state.max_ip; ip++) {
                uint64_t inst = validation_state.shader[ip];
@@ -509,9 +616,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                (validated_shader->uniforms_size +
                 4 * validated_shader->num_texture_samples);
 
+       kfree(validation_state.branch_targets);
+
        return validated_shader;
 
 fail:
+       kfree(validation_state.branch_targets);
        if (validated_shader) {
                kfree(validated_shader->texture_samples);
                kfree(validated_shader);
index 2ffa68663b1cc601fc3007ad29ce6c6817f07fbe..c0b7c56c007374150b5d27fcb7af96437360ace2 100644 (file)
@@ -301,6 +301,9 @@ enum qpu_unpack {
 #define QPU_OP_ADD_SHIFT                24
 #define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
 
+#define QPU_LOAD_IMM_SHIFT              0
+#define QPU_LOAD_IMM_MASK               QPU_MASK(31, 0)
+
 #define QPU_BRANCH_TARGET_SHIFT         0
 #define QPU_BRANCH_TARGET_MASK          QPU_MASK(31, 0)
 
index e1f8b5a305e7800c97011749336483ae27a1f88c..1352c9bafd242c6d450b51d6a82a9af839a3e1b5 100644 (file)
@@ -47,6 +47,24 @@ struct vc4_exec_info;
 #define max(x, y) MAX2(x, y)
 #define min(x, y) MIN2(x, y)
 #define BUG_ON(condition) assert(!(condition))
+#define BIT(bit) (1u << bit)
+
+/* Unsigned long-based bitmap interface in the linux kernel */
+#define BITMAP_WORDBITS (sizeof(unsigned long) * 8)
+#define BITS_TO_LONGS(bits) (roundup(bits, BITMAP_WORDBITS) / \
+                             sizeof(unsigned long))
+static inline bool
+test_bit(unsigned int bit, unsigned long *addr)
+{
+        return addr[bit / BITMAP_WORDBITS] & (1ul << (bit % BITMAP_WORDBITS));
+}
+
+static inline bool
+set_bit(unsigned int bit, unsigned long *addr)
+{
+        return addr[bit / BITMAP_WORDBITS] |= (1ul << (bit % BITMAP_WORDBITS));
+}
+
 
 static inline int
 copy_from_user(void *dst, void *src, size_t size)