anv: add a new execution mode for secondary command buffers
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Sat, 4 Apr 2020 10:22:24 +0000 (13:22 +0300)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 20 May 2020 11:02:27 +0000 (14:02 +0300)
This change adds a call/return execution mode for secondary command
buffer rather than the existing copy into the primary batch mode.

v2: Rework convention to avoid burning an ALU register (Jason)

v3: Use anv_address_add() (Jason)

v4: Move command emissions to anv_batch_chain.c (Jason)

v5: Also move last MI_BBS emission in secondary command buffer to
    anv_batch_chain.c (Jason)

v6: Fix end secondary command buffer end (Jason)

v7: Refactor anv_batch_address() to remove additional emit functions

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2775>

src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_private.h

index be131d61c165a35726d816096972d1da99a3d486..2c3b7b3cad1f21d1d4af4446bd7d53a5141a94f4 100644 (file)
@@ -30,6 +30,7 @@
 #include "anv_private.h"
 
 #include "genxml/gen8_pack.h"
+#include "genxml/genX_bits.h"
 
 #include "util/debug.h"
 
@@ -288,6 +289,17 @@ anv_batch_emit_reloc(struct anv_batch *batch,
    return address_u64;
 }
 
+struct anv_address
+anv_batch_address(struct anv_batch *batch, void *batch_location)
+{
+   assert(batch->start < batch_location);
+
+   /* Allow a jump at the current location of the batch. */
+   assert(batch->next >= batch_location);
+
+   return anv_address_add(batch->start_addr, batch_location - batch->start);
+}
+
 void
 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
 {
@@ -396,6 +408,7 @@ static void
 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
                    size_t batch_padding)
 {
+   batch->start_addr = (struct anv_address) { .bo = bbo->bo, };
    batch->next = batch->start = bbo->bo->map;
    batch->end = bbo->bo->map + bbo->bo->size - batch_padding;
    batch->relocs = &bbo->relocs;
@@ -406,6 +419,7 @@ static void
 anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
                       size_t batch_padding)
 {
+   batch->start_addr = (struct anv_address) { .bo = bbo->bo, };
    batch->start = bbo->bo->map;
    batch->next = bbo->bo->map + bbo->length;
    batch->end = bbo->bo->map + bbo->bo->size - batch_padding;
@@ -914,6 +928,29 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
       const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
       if (!cmd_buffer->device->can_chain_batches) {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
+      } else if (cmd_buffer->device->physical->use_softpin) {
+         cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN;
+         /* If the secondary command buffer begins & ends in the same BO and
+          * its length is less than the length of CS prefetch, add some NOOPs
+          * instructions so the last MI_BATCH_BUFFER_START is outside the CS
+          * prefetch.
+          */
+         if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
+            int32_t batch_len =
+               cmd_buffer->batch.next - cmd_buffer->batch.start;
+
+            for (int32_t i = 0; i < (512 - batch_len); i += 4)
+               anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop);
+         }
+
+         void *jump_addr =
+            anv_batch_emitn(&cmd_buffer->batch,
+                            GEN8_MI_BATCH_BUFFER_START_length,
+                            GEN8_MI_BATCH_BUFFER_START,
+                            .AddressSpaceIndicator = ASI_PPGTT,
+                            .SecondLevelBatchBuffer = Firstlevelbatch) +
+            (GEN8_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
+         cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr);
       } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
                  (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
          /* If the secondary has exactly one batch buffer in its list *and*
@@ -1023,6 +1060,26 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
                             GEN8_MI_BATCH_BUFFER_START_length * 4);
       break;
    }
+   case ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN: {
+      struct anv_batch_bo *first_bbo =
+         list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
+
+      uint64_t *write_return_addr =
+         anv_batch_emitn(&primary->batch,
+                         GEN8_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
+                         GEN8_MI_STORE_DATA_IMM,
+                         .Address = secondary->return_addr)
+         + (GEN8_MI_STORE_DATA_IMM_ImmediateData_start / 8);
+
+      emit_batch_buffer_start(primary, first_bbo->bo, 0);
+
+      *write_return_addr =
+         anv_address_physical(anv_batch_address(&primary->batch,
+                                                primary->batch.next));
+
+      anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
+      break;
+   }
    default:
       assert(!"Invalid execution mode");
    }
index 28c2aa5633a9be9f8b7d382cfd86eeeea5ef02f7..3a0563ae83cc079d9dfc63453edbf6a085325428 100644 (file)
@@ -1592,9 +1592,16 @@ struct anv_batch_bo {
    struct anv_reloc_list                        relocs;
 };
 
+struct anv_address {
+   struct anv_bo *bo;
+   uint32_t offset;
+};
+
 struct anv_batch {
    const VkAllocationCallbacks *                alloc;
 
+   struct anv_address                           start_addr;
+
    void *                                       start;
    void *                                       end;
    void *                                       next;
@@ -1621,6 +1628,7 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
 uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
                               void *location, struct anv_bo *bo, uint32_t offset);
+struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
 
 static inline VkResult
 anv_batch_set_error(struct anv_batch *batch, VkResult error)
@@ -1637,11 +1645,6 @@ anv_batch_has_error(struct anv_batch *batch)
    return batch->status != VK_SUCCESS;
 }
 
-struct anv_address {
-   struct anv_bo *bo;
-   uint32_t offset;
-};
-
 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
 
 static inline bool
@@ -2824,6 +2827,7 @@ enum anv_cmd_buffer_exec_mode {
    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
+   ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
 };
 
 struct anv_cmd_buffer {
@@ -2873,6 +2877,8 @@ struct anv_cmd_buffer {
 
    struct anv_cmd_state                         state;
 
+   struct anv_address                           return_addr;
+
    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
    uint64_t                                     intel_perf_marker;
 };