vc4: Add simulator kernel validation for multithreaded fragment shaders.

author Jonas Pfeil <pfeiljonas@gmx.de>

Fri, 11 Nov 2016 01:52:03 +0000 (17:52 -0800)

committer Eric Anholt <eric@anholt.net>

Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
author Jonas Pfeil <pfeiljonas@gmx.de>
Fri, 11 Nov 2016 01:52:03 +0000 (17:52 -0800)
committer Eric Anholt <eric@anholt.net>
Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h

index 90f45397d83110fa21324544bd63e442388e2d2f..8f5ed00d96ff61d0ea5a814c41d15070416a58c6 100644 (file)
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -150,6 +150,8 @@ struct vc4_validated_shader_info
  
         uint32_t num_uniform_addr_offsets;
         uint32_t *uniform_addr_offsets;
+
+       bool is_threaded;
  };
  
  /* vc4_validate.c */
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c

index a9dce1fa3796c79d6cebb002702fdcb36aa09b5f..bd193b993b321e460760e5da6a66ddd20d89e1a8 100644 (file)
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -780,11 +780,6 @@ validate_gl_shader_rec(struct drm_device *dev,
         exec->shader_rec_v += roundup(packet_size, 16);
         exec->shader_rec_size -= packet_size;
  
-       if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
-               DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
-               return -EINVAL;
-       }
-
         for (i = 0; i < shader_reloc_count; i++) {
                 if (src_handles[i] > exec->bo_count) {
                         DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
@@ -801,6 +796,18 @@ validate_gl_shader_rec(struct drm_device *dev,
                         return -EINVAL;
         }
  
+       if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
+           to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
+               DRM_ERROR("Thread mode of CL and FS do not match\n");
+               return -EINVAL;
+       }
+
+       if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
+           to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
+               DRM_ERROR("cs and vs cannot be threaded\n");
+               return -EINVAL;
+       }
+
         for (i = 0; i < shader_reloc_count; i++) {
                 struct vc4_validated_shader_info *validated_shader;
                 uint32_t o = shader_reloc_offsets[i];
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c

index 0ff3d01f3f2459ac235c15f26f6aa75fc25cb69e..d93f5239d7d33b6b469441216c4bed7139ea5a8d 100644 (file)
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -84,6 +84,14 @@ struct vc4_shader_validation_state {
          * basic blocks.
          */
         bool needs_uniform_address_for_loop;
+
+       /* Set when we find an instruction which violates the criterion for a
+        * threaded shader. These are:
+        *      - only write the lower half of the register space
+        *      - last thread switch signaled at the end
+        * So track the usage of the thread switches and the register usage.
+        */
+       bool all_registers_used;
  };
  
  static uint32_t
@@ -119,6 +127,12 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
                 return ~0;
  }
  
+static bool live_reg_is_upper_half(uint32_t lri)
+{
+       return  (lri >=16 && lri < 32) ||
+               (lri >=32 + 16 && lri < 32 + 32);
+}
+
  static bool
  is_tmu_submit(uint32_t waddr)
  {
@@ -385,6 +399,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
                 } else {
                         validation_state->live_immediates[lri] = ~0;
                 }
+
+               if (live_reg_is_upper_half(lri))
+                       validation_state->all_registers_used = true;
         }
  
         switch (waddr) {
@@ -593,6 +610,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
                 }
         }
  
+       if ((raddr_a >= 16 && raddr_a < 32) ||
+           (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
+               validation_state->all_registers_used = true;
+       }
+
         return true;
  }
  
@@ -748,6 +770,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
  {
         bool found_shader_end = false;
         int shader_end_ip = 0;
+       uint32_t last_thread_switch_ip = -3;
         uint32_t ip;
         struct vc4_validated_shader_info *validated_shader = NULL;
         struct vc4_shader_validation_state validation_state;
@@ -780,6 +803,16 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                 if (!vc4_handle_branch_target(&validation_state))
                         goto fail;
  
+               if (ip == last_thread_switch_ip + 3) {
+                       /* Reset r0-r3 live clamp data */
+                       int i;
+                       for (i = 64; i < LIVE_REG_COUNT; i++) {
+                               validation_state.live_min_clamp_offsets[i] = ~0;
+                               validation_state.live_max_clamp_regs[i] = false;
+                               validation_state.live_immediates[i] = ~0;
+                       }
+               }
+
                 switch (sig) {
                 case QPU_SIG_NONE:
                 case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -789,6 +822,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                 case QPU_SIG_LOAD_TMU1:
                 case QPU_SIG_PROG_END:
                 case QPU_SIG_SMALL_IMM:
+               case QPU_SIG_THREAD_SWITCH:
+               case QPU_SIG_LAST_THREAD_SWITCH:
                         if (!check_instruction_writes(validated_shader,
                                                       &validation_state)) {
                                 DRM_ERROR("Bad write at ip %d\n", ip);
@@ -804,6 +839,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                                 shader_end_ip = ip;
                         }
  
+                       if (sig == QPU_SIG_THREAD_SWITCH ||
+                           sig == QPU_SIG_LAST_THREAD_SWITCH) {
+                               validated_shader->is_threaded = true;
+
+                               if (ip < last_thread_switch_ip + 3) {
+                                       DRM_ERROR("Thread switch too soon after "
+                                                 "last switch at ip %d\n", ip);
+                                       goto fail;
+                               }
+                               last_thread_switch_ip = ip;
+                       }
+
                         break;
  
                 case QPU_SIG_LOAD_IMM:
@@ -818,6 +865,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                         if (!check_branch(inst, validated_shader,
                                           &validation_state, ip))
                                 goto fail;
+
+                       if (ip < last_thread_switch_ip + 3) {
+                               DRM_ERROR("Branch in thread switch at ip %d",
+                                         ip);
+                               goto fail;
+                       }
+
                         break;
                 default:
                         DRM_ERROR("Unsupported QPU signal %d at "
@@ -839,6 +893,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                 goto fail;
         }
  
+       /* Might corrupt other thread */
+       if (validated_shader->is_threaded &&
+           validation_state.all_registers_used) {
+               DRM_ERROR("Shader uses threading, but uses the upper "
+                         "half of the registers, too\n");
+               goto fail;
+       }
+
         /* If we did a backwards branch and we haven't emitted a uniforms
          * reset since then, we still need the uniforms stream to have the
          * uniforms address available so that the backwards branch can do its
author	Jonas Pfeil <pfeiljonas@gmx.de>
	Fri, 11 Nov 2016 01:52:03 +0000 (17:52 -0800)
committer	Eric Anholt <eric@anholt.net>
	Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
src/gallium/drivers/vc4/kernel/vc4_drv.h		patch \| blob \| history
src/gallium/drivers/vc4/kernel/vc4_validate.c		patch \| blob \| history
src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c		patch \| blob \| history