anv: Implement VK_KHR_draw_indirect_count for gen 7+
authorDanylo Piliaiev <danylo.piliaiev@globallogic.com>
Fri, 5 Oct 2018 09:15:24 +0000 (12:15 +0300)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 18 Jan 2019 18:31:44 +0000 (18:31 +0000)
v2: by Jason Ekstrand
  - Move out of the draw loop population of registers
    which aren't changed in it.
  - Remove dependency on ALU registers.
  - Clarify usage of PIPE_CONTROL
  - Without usage of ALU registers patch works for gen7+

v3: set pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES

Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/intel/vulkan/anv_extensions.py
src/intel/vulkan/genX_cmd_buffer.c

index 2d212361955c092034b76b2fa9ee76c8f3f386b0..79aef1328408ea8a8b55a0a617bef60cf4218d27 100644 (file)
@@ -81,6 +81,7 @@ EXTENSIONS = [
     Extension('VK_KHR_device_group',                      1, True),
     Extension('VK_KHR_device_group_creation',             1, True),
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+    Extension('VK_KHR_draw_indirect_count',               1, True),
     Extension('VK_KHR_driver_properties',                 1, True),
     Extension('VK_KHR_external_fence',                    1,
               'device->has_syncobj_wait'),
index 6fb19661ebba5d0d39409dcaeb8ec1fb45173190..6c45acda6f52cd16ac62bac3261f84caa35df6da 100644 (file)
@@ -3018,6 +3018,153 @@ void genX(CmdDrawIndexedIndirect)(
    cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
 
+static void
+prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+                                 struct anv_address count_address)
+{
+   /* Upload the current draw count from the draw parameters buffer to
+    * MI_PREDICATE_SRC0.
+    */
+   emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+}
+
+static void
+emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+                          uint32_t draw_index)
+{
+   /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index);
+
+   if (draw_index == 0) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOADINV;
+         mip.CombineOperation = COMBINE_SET;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   } else {
+      /* While draw_index < draw_count the predicate's result will be
+       *  (draw_index == draw_count) ^ TRUE = TRUE
+       * When draw_index == draw_count the result is
+       *  (TRUE) ^ TRUE = FALSE
+       * After this all results will be:
+       *  (FALSE) ^ FALSE = FALSE
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOAD;
+         mip.CombineOperation = COMBINE_XOR;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   }
+}
+
+void genX(CmdDrawIndirectCountKHR)(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    _buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    _countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+   ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+   if (anv_batch_has_error(&cmd_buffer->batch))
+      return;
+
+   genX(cmd_buffer_flush_state)(cmd_buffer);
+
+   struct anv_address count_address =
+      anv_address_add(count_buffer->address, countBufferOffset);
+
+   prepare_for_draw_count_predicate(cmd_buffer, count_address);
+
+   for (uint32_t i = 0; i < maxDrawCount; i++) {
+      struct anv_address draw = anv_address_add(buffer->address, offset);
+
+      emit_draw_count_predicate(cmd_buffer, i);
+
+      if (vs_prog_data->uses_firstvertex ||
+          vs_prog_data->uses_baseinstance)
+         emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 8));
+      if (vs_prog_data->uses_drawid)
+         emit_draw_index(cmd_buffer, i);
+
+      load_indirect_parameters(cmd_buffer, draw, false);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+         prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = true;
+         prim.VertexAccessType         = SEQUENTIAL;
+         prim.PrimitiveTopologyType    = pipeline->topology;
+      }
+
+      offset += stride;
+   }
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
+}
+
+void genX(CmdDrawIndexedIndirectCountKHR)(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    _buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    _countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+   ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+   if (anv_batch_has_error(&cmd_buffer->batch))
+      return;
+
+   genX(cmd_buffer_flush_state)(cmd_buffer);
+
+   struct anv_address count_address =
+      anv_address_add(count_buffer->address, countBufferOffset);
+
+   prepare_for_draw_count_predicate(cmd_buffer, count_address);
+
+   for (uint32_t i = 0; i < maxDrawCount; i++) {
+      struct anv_address draw = anv_address_add(buffer->address, offset);
+
+      emit_draw_count_predicate(cmd_buffer, i);
+
+      /* TODO: We need to stomp base vertex to 0 somehow */
+      if (vs_prog_data->uses_firstvertex ||
+          vs_prog_data->uses_baseinstance)
+         emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 12));
+      if (vs_prog_data->uses_drawid)
+         emit_draw_index(cmd_buffer, i);
+
+      load_indirect_parameters(cmd_buffer, draw, true);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+         prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = true;
+         prim.VertexAccessType         = RANDOM;
+         prim.PrimitiveTopologyType    = pipeline->topology;
+      }
+
+      offset += stride;
+   }
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
+}
+
 static VkResult
 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
 {