intel/tools: Disassemble WAIT's argument as a destination
[mesa.git] / src / intel / vulkan / anv_batch_chain.c
index f820a69ceec5851ecd0f0f230392946d5a3c4587..ce429ff2d85536da878066f01b1552779f70087e 100644 (file)
@@ -409,9 +409,8 @@ static void
 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
                    size_t batch_padding)
 {
-   batch->start_addr = (struct anv_address) { .bo = bbo->bo, };
-   batch->next = batch->start = bbo->bo->map;
-   batch->end = bbo->bo->map + bbo->bo->size - batch_padding;
+   anv_batch_set_storage(batch, (struct anv_address) { .bo = bbo->bo, },
+                         bbo->bo->map, bbo->bo->size - batch_padding);
    batch->relocs = &bbo->relocs;
    anv_reloc_list_clear(&bbo->relocs);
 }
@@ -929,7 +928,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
       const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
       if (!cmd_buffer->device->can_chain_batches) {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
-      } else if (cmd_buffer->device->physical->use_softpin) {
+      } else if (cmd_buffer->device->physical->use_call_secondary) {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN;
          /* If the secondary command buffer begins & ends in the same BO and
           * its length is less than the length of CS prefetch, add some NOOPs
@@ -952,6 +951,11 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
                             .SecondLevelBatchBuffer = Firstlevelbatch) +
             (GEN8_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
          cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr);
+
+         /* The emit above may have caused us to chain batch buffers which
+          * would mean that batch_bo is no longer valid.
+          */
+         batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
       } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
                  (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
          /* If the secondary has exactly one batch buffer in its list *and*
@@ -1092,6 +1096,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
 struct anv_execbuf {
    struct drm_i915_gem_execbuffer2           execbuf;
 
+   struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
    struct drm_i915_gem_exec_object2 *        objects;
    uint32_t                                  bo_count;
    struct anv_bo **                          bos;
@@ -1120,6 +1126,24 @@ anv_execbuf_finish(struct anv_execbuf *exec)
    vk_free(exec->alloc, exec->bos);
 }
 
+static void
+anv_execbuf_add_ext(struct anv_execbuf *exec,
+                    uint32_t ext_name,
+                    struct i915_user_extension *ext)
+{
+   __u64 *iter = &exec->execbuf.cliprects_ptr;
+
+   exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
+
+   while (*iter != 0) {
+      iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
+   }
+
+   ext->name = ext_name;
+
+   *iter = (uintptr_t) ext;
+}
+
 static VkResult
 anv_execbuf_add_bo_bitset(struct anv_device *device,
                           struct anv_execbuf *exec,
@@ -1378,9 +1402,6 @@ static bool
 relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
                     struct anv_execbuf *exec)
 {
-   if (cmd_buffer->perf_query_pool)
-      return false;
-
    if (!exec->has_relocs)
       return true;
 
@@ -1637,7 +1658,7 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
       .buffer_count = execbuf->bo_count,
       .batch_start_offset = 0,
       .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
-      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
+      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | I915_EXEC_NO_RELOC,
       .rsvd1 = device->context_id,
       .rsvd2 = 0,
    };
@@ -1680,7 +1701,13 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
    execbuf.alloc_scope = submit->alloc_scope;
    execbuf.perf_query_pass = submit->perf_query_pass;
 
-   VkResult result;
+   /* Always add the workaround BO as it includes a driver identifier for the
+    * error_state.
+    */
+   VkResult result =
+      anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
+   if (result != VK_SUCCESS)
+      goto error;
 
    for (uint32_t i = 0; i < submit->fence_bo_count; i++) {
       int signaled;
@@ -1704,7 +1731,7 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
          .buffer_count = execbuf.bo_count,
          .batch_start_offset = 0,
          .batch_len = submit->simple_bo_size,
-         .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
+         .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | I915_EXEC_NO_RELOC,
          .rsvd1 = device->context_id,
          .rsvd2 = 0,
       };
@@ -1752,18 +1779,30 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
 
    if (submit->fence_count > 0) {
       assert(device->physical->has_syncobj);
-      execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
-      execbuf.execbuf.num_cliprects = submit->fence_count;
-      execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
+      if (device->has_thread_submit) {
+         execbuf.timeline_fences.fence_count = submit->fence_count;
+         execbuf.timeline_fences.handles_ptr = (uintptr_t)submit->fences;
+         execbuf.timeline_fences.values_ptr = (uintptr_t)submit->fence_values;
+         anv_execbuf_add_ext(&execbuf,
+                             DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
+                             &execbuf.timeline_fences.base);
+      } else {
+         execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
+         execbuf.execbuf.num_cliprects = submit->fence_count;
+         execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
+      }
    }
 
    if (submit->in_fence != -1) {
+      assert(!device->has_thread_submit);
       execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
       execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence;
    }
 
-   if (submit->need_out_fence)
+   if (submit->need_out_fence) {
+      assert(!device->has_thread_submit);
       execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
+   }
 
    if (has_perf_query) {
       struct anv_query_pool *query_pool = submit->cmd_buffer->perf_query_pool;
@@ -1774,8 +1813,9 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
       /* Some performance queries just the pipeline statistic HW, no need for
        * OA in that case, so no need to reconfigure.
        */
-      if (query_info->kind == GEN_PERF_QUERY_TYPE_OA ||
-          query_info->kind == GEN_PERF_QUERY_TYPE_RAW) {
+      if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
+          (query_info->kind == GEN_PERF_QUERY_TYPE_OA ||
+           query_info->kind == GEN_PERF_QUERY_TYPE_RAW)) {
          int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
                              (void *)(uintptr_t) query_info->oa_metrics_set_id);
          if (ret < 0) {