anv: Move push constant allocation to the command buffer
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 9 Mar 2016 02:10:22 +0000 (18:10 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 27 May 2016 22:17:43 +0000 (15:17 -0700)
Instead of blasting it out as part of the pipeline, we put it in the
command buffer and only blast it out when it's really needed.  Since the
PUSH_CONSTANT_ALLOC commands aren't pipelined, they immediately cause a
stall which we would like to avoid.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_pipeline_util.h

index bba24e82337e853fda0d71494bb64a7111cc1660..db7793ee462d6e24bcdc61edcd68c0dd86b3fbf4 100644 (file)
@@ -130,6 +130,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
    state->descriptors_dirty = 0;
    state->push_constants_dirty = 0;
    state->pipeline = NULL;
+   state->push_constant_stages = 0;
    state->restart_index = UINT32_MAX;
    state->dynamic = default_dynamic_state;
    state->need_query_wa = true;
index d63e50e228b5bad116b2785002aa663ac4865df6..789bc1a4f1ee48619c13fe704f1559d20571d186 100644 (file)
@@ -940,28 +940,6 @@ anv_compute_urb_partition(struct anv_pipeline *pipeline)
    pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks;
    pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1;
    pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0;
-
-   const unsigned stages =
-      _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS);
-   unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0;
-   unsigned used_kb = 0;
-
-   /* Broadwell+ and Haswell gt3 require that the push constant sizes be in
-    * units of 2KB.  Incidentally, these are the same platforms that have
-    * 32KB worth of push constant space.
-    */
-   if (push_constant_kb == 32)
-      size_per_stage &= ~1u;
-
-   for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
-      pipeline->urb.push_size[i] =
-         (pipeline->active_stages & (1 << i)) ? size_per_stage : 0;
-      used_kb += pipeline->urb.push_size[i];
-      assert(used_kb <= push_constant_kb);
-   }
-
-   pipeline->urb.push_size[MESA_SHADER_FRAGMENT] =
-      push_constant_kb - used_kb;
 }
 
 static void
index 6ac0745b66574b256d9b1aac7e2da09a976cb237..d8fffc3bf6d9000fc6559171845f96c24f32909a 100644 (file)
@@ -1177,6 +1177,7 @@ struct anv_cmd_state {
    uint32_t                                     restart_index;
    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
    struct anv_descriptor_set *                  descriptors[MAX_SETS];
+   VkShaderStageFlags                           push_constant_stages;
    struct anv_push_constants *                  push_constants[MESA_SHADER_STAGES];
    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
    struct anv_state                             samplers[MESA_SHADER_STAGES];
@@ -1411,7 +1412,6 @@ struct anv_pipeline {
    uint32_t                                     scratch_start[MESA_SHADER_STAGES];
    uint32_t                                     total_scratch;
    struct {
-      uint8_t                                   push_size[MESA_SHADER_FRAGMENT + 1];
       uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  entries[MESA_SHADER_GEOMETRY + 1];
index ee47c2926e0579c00346a16d7eecc6b1b84c4081..f5e3530b59d5ac04c113be917b85485bff2b28d6 100644 (file)
@@ -274,6 +274,72 @@ void genX(CmdPipelineBarrier)(
    }
 }
 
+static void
+cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
+{
+   VkShaderStageFlags stages = cmd_buffer->state.pipeline->active_stages;
+
+   /* In order to avoid thrash, we assume that vertex and fragment stages
+    * always exist.  In the rare case where one is missing *and* the other
+    * uses push concstants, this may be suboptimal.  However, avoiding stalls
+    * seems more important.
+    */
+   stages |= VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT;
+
+   if (stages == cmd_buffer->state.push_constant_stages)
+      return;
+
+#if GEN_GEN >= 8
+   const unsigned push_constant_kb = 32;
+#elif GEN_IS_HASWELL
+   const unsigned push_constant_kb = cmd_buffer->device->info.gt == 3 ? 32 : 16;
+#else
+   const unsigned push_constant_kb = 16;
+#endif
+
+   const unsigned num_stages =
+      _mesa_bitcount(stages & VK_SHADER_STAGE_ALL_GRAPHICS);
+   unsigned size_per_stage = push_constant_kb / num_stages;
+
+   /* Broadwell+ and Haswell gt3 require that the push constant sizes be in
+    * units of 2KB.  Incidentally, these are the same platforms that have
+    * 32KB worth of push constant space.
+    */
+   if (push_constant_kb == 32)
+      size_per_stage &= ~1u;
+
+   uint32_t kb_used = 0;
+   for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
+      unsigned push_size = (stages & (1 << i)) ? size_per_stage : 0;
+      anv_batch_emit(&cmd_buffer->batch,
+                     GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
+         alloc._3DCommandSubOpcode  = 18 + i;
+         alloc.ConstantBufferOffset = (push_size > 0) ? kb_used : 0;
+         alloc.ConstantBufferSize   = push_size;
+      }
+      kb_used += push_size;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) {
+      alloc.ConstantBufferOffset = kb_used;
+      alloc.ConstantBufferSize = push_constant_kb - kb_used;
+   }
+
+   cmd_buffer->state.push_constant_stages = stages;
+
+   /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:
+    *
+    *    "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to
+    *    the next 3DPRIMITIVE command after programming the
+    *    3DSTATE_PUSH_CONSTANT_ALLOC_VS"
+    *
+    * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of
+    * pipeline setup, we need to dirty push constants.
+    */
+   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
+}
+
 static uint32_t
 cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -384,16 +450,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
 
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
 
-      /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:
-       *
-       *    "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to
-       *    the next 3DPRIMITIVE command after programming the
-       *    3DSTATE_PUSH_CONSTANT_ALLOC_VS"
-       *
-       * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of
-       * pipeline setup, we need to dirty push constants.
+      /* If the pipeline changed, we may need to re-allocate push constant
+       * space in the URB.
        */
-      cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
+      cmd_buffer_alloc_push_constants(cmd_buffer);
    }
 
 #if GEN_GEN <= 7
index ecbe436e4ea2070bfed5223a1b6442446c0bb7c4..2ed55e016dc15c5749ca5e0d89d5317fdc98a7aa 100644 (file)
@@ -205,18 +205,6 @@ emit_urb_setup(struct anv_pipeline *pipeline)
    }
 #endif
 
-   unsigned push_start = 0;
-   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
-      unsigned push_size = pipeline->urb.push_size[i];
-      anv_batch_emit(&pipeline->batch,
-                     GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
-         alloc._3DCommandSubOpcode  = 18 + i;
-         alloc.ConstantBufferOffset = (push_size > 0) ? push_start : 0;
-         alloc.ConstantBufferSize   = push_size;
-      }
-      push_start += pipeline->urb.push_size[i];
-   }
-
    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), urb) {
          urb._3DCommandSubOpcode       = 48 + i;