anv: Move gen8+ push constant packet workaround.
authorRafael Antognolli <rafael.antognolli@intel.com>
Mon, 2 Dec 2019 21:41:32 +0000 (13:41 -0800)
committerRafael Antognolli <rafael.antognolli@intel.com>
Wed, 4 Dec 2019 20:48:25 +0000 (20:48 +0000)
Store push_ranges in ascending order, and only "shift" them to the end
of the array during state packet emission.

We don't need this workaround with the new 3DSTATE_CONSTANT_ALL packet.
So instead of applying the workaround here just for GEN < 12 (which
requires and extra loop through all the ranges to figure out if we
should shift them or not), we simply move the whole logic to the state
emission code. At that point, in a later commit, we are already looping
through all of the ranges anyway to check which packet we will be using,
so we might as well implement the workaround there, where it is going to
be used.

v3: Move gen8+ workaround to the state emission code (Caio).
v4: Add explanation of why we moved the workaroudn (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
src/intel/vulkan/anv_nir_compute_push_layout.c
src/intel/vulkan/genX_cmd_buffer.c

index 0b696fbc9e77db90af7a37c43dca92558544cfa2..3a6e393672de2370d565ecbe1a087c6a5b985844 100644 (file)
@@ -131,20 +131,12 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
       }
       assert(total_push_regs <= 64);
 
-      /* The Skylake PRM contains the following restriction:
-       *
-       *    "The driver must ensure The following case does not occur
-       *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
-       *     buffer 3 read length equal to zero committed followed by a
-       *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
-       *     zero committed."
-       *
-       * To avoid this, we program the buffers in the highest slots.
-       * This way, slot 0 is only used if slot 3 is also used.
-       */
-      int n = 3;
+      int n = 0;
 
-      for (int i = 3; i >= 0; i--) {
+      if (push_constant_range.length > 0)
+         map->push_ranges[n++] = push_constant_range;
+
+      for (int i = 0; i < 4; i++) {
          const struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
          if (ubo_range->length == 0)
             continue;
@@ -152,7 +144,7 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
          const struct anv_pipeline_binding *binding =
             &map->surface_to_descriptor[ubo_range->block];
 
-         map->push_ranges[n--] = (struct anv_push_range) {
+         map->push_ranges[n++] = (struct anv_push_range) {
             .set = binding->set,
             .index = binding->index,
             .dynamic_offset_index = binding->dynamic_offset_index,
@@ -160,9 +152,6 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
             .length = ubo_range->length,
          };
       }
-
-      if (push_constant_range.length > 0)
-         map->push_ranges[n--] = push_constant_range;
    } else {
       /* For Ivy Bridge, the push constants packets have a different
        * rule that would require us to iterate in the other direction
index 67500f91cdf97eb9d62b99c8c2f94e731837cf11..bceae33cfbcf4ef431ec2516e7cbcf5e94eede6e 100644 (file)
@@ -2539,10 +2539,31 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                &pipeline->shaders[stage]->bind_map;
 
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
+            unsigned buffer_count = 0;
             for (unsigned i = 0; i < 4; i++) {
                const struct anv_push_range *range = &bind_map->push_ranges[i];
-               if (range->length == 0)
-                  continue;
+               if (range->length > 0)
+                  buffer_count++;
+            }
+
+            /* The Skylake PRM contains the following restriction:
+             *
+             *    "The driver must ensure The following case does not occur
+             *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+             *     buffer 3 read length equal to zero committed followed by a
+             *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+             *     zero committed."
+             *
+             * To avoid this, we program the buffers in the highest slots.
+             * This way, slot 0 is only used if slot 3 is also used.
+             */
+            assert(buffer_count <= 4);
+            const unsigned shift = 4 - buffer_count;
+            for (unsigned i = 0; i < buffer_count; i++) {
+               const struct anv_push_range *range = &bind_map->push_ranges[i];
+
+               /* At this point we only have non-empty ranges */
+               assert(range->length > 0);
 
                struct anv_address addr;
                switch (range->set) {
@@ -2588,8 +2609,8 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                }
                }
 
-               c.ConstantBody.ReadLength[i] = range->length;
-               c.ConstantBody.Buffer[i] =
+               c.ConstantBody.ReadLength[i + shift] = range->length;
+               c.ConstantBody.Buffer[i + shift] =
                   anv_address_add(addr, range->start * 32);
             }
 #else