anv: Delay allocation of relocation lists
[mesa.git] / src / intel / vulkan / anv_batch_chain.c
index c47a81c8a4df1baf4644c6a0787f1fb49921c5a0..70c14d14f4984e8baf386e8f88997479e51a857b 100644 (file)
  * Functions related to anv_reloc_list
  *-----------------------------------------------------------------------*/
 
+VkResult
+anv_reloc_list_init(struct anv_reloc_list *list,
+                    const VkAllocationCallbacks *alloc)
+{
+   memset(list, 0, sizeof(*list));
+   return VK_SUCCESS;
+}
+
 static VkResult
 anv_reloc_list_init_clone(struct anv_reloc_list *list,
                           const VkAllocationCallbacks *alloc,
                           const struct anv_reloc_list *other_list)
 {
-   if (other_list) {
-      list->num_relocs = other_list->num_relocs;
-      list->array_length = other_list->array_length;
-   } else {
-      list->num_relocs = 0;
-      list->array_length = 256;
-   }
-
-   list->relocs =
-      vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
-                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-   if (list->relocs == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   list->reloc_bos =
-      vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
-                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-   if (list->reloc_bos == NULL) {
-      vk_free(alloc, list->relocs);
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
-
-   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                 _mesa_key_pointer_equal);
+   list->num_relocs = other_list->num_relocs;
+   list->array_length = other_list->array_length;
+
+   if (list->num_relocs > 0) {
+      list->relocs =
+         vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
+                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (list->relocs == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   if (!list->deps) {
-      vk_free(alloc, list->relocs);
-      vk_free(alloc, list->reloc_bos);
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
+      list->reloc_bos =
+         vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
+                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (list->reloc_bos == NULL) {
+         vk_free(alloc, list->relocs);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
 
-   if (other_list) {
       memcpy(list->relocs, other_list->relocs,
              list->array_length * sizeof(*list->relocs));
       memcpy(list->reloc_bos, other_list->reloc_bos,
              list->array_length * sizeof(*list->reloc_bos));
-      struct set_entry *entry;
-      set_foreach(other_list->deps, entry) {
-         _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
+   } else {
+      list->relocs = NULL;
+      list->reloc_bos = NULL;
+   }
+
+   if (other_list->deps) {
+      list->deps = _mesa_set_clone(other_list->deps, NULL);
+      if (!list->deps) {
+         vk_free(alloc, list->relocs);
+         vk_free(alloc, list->reloc_bos);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
+   } else {
+      list->deps = NULL;
    }
 
    return VK_SUCCESS;
 }
 
-VkResult
-anv_reloc_list_init(struct anv_reloc_list *list,
-                    const VkAllocationCallbacks *alloc)
-{
-   return anv_reloc_list_init_clone(list, alloc, NULL);
-}
-
 void
 anv_reloc_list_finish(struct anv_reloc_list *list,
                       const VkAllocationCallbacks *alloc)
 {
    vk_free(alloc, list->relocs);
    vk_free(alloc, list->reloc_bos);
-   _mesa_set_destroy(list->deps, NULL);
+   if (list->deps != NULL)
+      _mesa_set_destroy(list->deps, NULL);
 }
 
 static VkResult
@@ -122,34 +118,27 @@ anv_reloc_list_grow(struct anv_reloc_list *list,
    if (list->num_relocs + num_additional_relocs <= list->array_length)
       return VK_SUCCESS;
 
-   size_t new_length = list->array_length * 2;
+   size_t new_length = MAX2(256, list->array_length * 2);
    while (new_length < list->num_relocs + num_additional_relocs)
       new_length *= 2;
 
    struct drm_i915_gem_relocation_entry *new_relocs =
-      vk_alloc(alloc, new_length * sizeof(*list->relocs), 8,
-                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      vk_realloc(alloc, list->relocs,
+                 new_length * sizeof(*list->relocs), 8,
+                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (new_relocs == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   list->relocs = new_relocs;
 
    struct anv_bo **new_reloc_bos =
-      vk_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
-                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-   if (new_reloc_bos == NULL) {
-      vk_free(alloc, new_relocs);
+      vk_realloc(alloc, list->reloc_bos,
+                 new_length * sizeof(*list->reloc_bos), 8,
+                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (new_reloc_bos == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
-
-   memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
-   memcpy(new_reloc_bos, list->reloc_bos,
-          list->num_relocs * sizeof(*list->reloc_bos));
-
-   vk_free(alloc, list->relocs);
-   vk_free(alloc, list->reloc_bos);
+   list->reloc_bos = new_reloc_bos;
 
    list->array_length = new_length;
-   list->relocs = new_relocs;
-   list->reloc_bos = new_reloc_bos;
 
    return VK_SUCCESS;
 }
@@ -163,6 +152,11 @@ anv_reloc_list_add(struct anv_reloc_list *list,
    int index;
 
    if (target_bo->flags & EXEC_OBJECT_PINNED) {
+      if (list->deps == NULL) {
+         list->deps = _mesa_pointer_set_create(NULL);
+         if (unlikely(list->deps == NULL))
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
       _mesa_set_add(list->deps, target_bo);
       return VK_SUCCESS;
    }
@@ -195,19 +189,26 @@ anv_reloc_list_append(struct anv_reloc_list *list,
    if (result != VK_SUCCESS)
       return result;
 
-   memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
-          other->num_relocs * sizeof(other->relocs[0]));
-   memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
-          other->num_relocs * sizeof(other->reloc_bos[0]));
+   if (other->num_relocs > 0) {
+      memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
+             other->num_relocs * sizeof(other->relocs[0]));
+      memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
+             other->num_relocs * sizeof(other->reloc_bos[0]));
 
-   for (uint32_t i = 0; i < other->num_relocs; i++)
-      list->relocs[i + list->num_relocs].offset += offset;
+      for (uint32_t i = 0; i < other->num_relocs; i++)
+         list->relocs[i + list->num_relocs].offset += offset;
 
-   list->num_relocs += other->num_relocs;
+      list->num_relocs += other->num_relocs;
+   }
 
-   struct set_entry *entry;
-   set_foreach(other->deps, entry) {
-      _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
+   if (other->deps) {
+      if (list->deps == NULL) {
+         list->deps = _mesa_pointer_set_create(NULL);
+         if (unlikely(list->deps == NULL))
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+      set_foreach(other->deps, entry)
+         _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
    }
 
    return VK_SUCCESS;
@@ -421,9 +422,9 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
                   struct anv_batch_bo *next_bbo,
                   uint32_t next_bbo_offset)
 {
-   MAYBE_UNUSED const uint32_t bb_start_offset =
+   const uint32_t bb_start_offset =
       prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4;
-   MAYBE_UNUSED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset;
+   ASSERTED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset;
 
    /* Make sure we're looking at a MI_BATCH_BUFFER_START */
    assert(((*bb_start >> 29) & 0x07) == 0);
@@ -503,7 +504,7 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
    return (struct anv_address) {
-      .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
+      .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
       .offset = bt_block->offset,
    };
 }
@@ -531,7 +532,7 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
    anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
       bbs.DWordLength               = cmd_buffer->device->info.gen < 8 ?
                                       gen7_length : gen8_length;
-      bbs._2ndLevelBatchBuffer      = _1stlevelbatch;
+      bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
       bbs.AddressSpaceIndicator     = ASI_PPGTT;
       bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
    }
@@ -681,8 +682,8 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
       return (struct anv_state) { 0 };
 
    state.offset = cmd_buffer->bt_next;
-   state.map = anv_binding_table_pool(device)->block_pool.map +
-      bt_block->offset + state.offset;
+   state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
+                                  bt_block->offset + state.offset);
 
    cmd_buffer->bt_next += state.alloc_size;
 
@@ -815,13 +816,13 @@ void
 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 {
    /* Delete all but the first batch bo */
-   assert(!list_empty(&cmd_buffer->batch_bos));
+   assert(!list_is_empty(&cmd_buffer->batch_bos));
    while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
       struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
       list_del(&bbo->link);
       anv_batch_bo_destroy(bbo, cmd_buffer);
    }
-   assert(!list_empty(&cmd_buffer->batch_bos));
+   assert(!list_is_empty(&cmd_buffer->batch_bos));
 
    anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
                       &cmd_buffer->batch,
@@ -894,8 +895,17 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
           * It doesn't matter where it points now so long as has a valid
           * relocation.  We'll adjust it later as part of the chaining
           * process.
+          *
+          * We set the end of the batch a little short so we would be sure we
+          * have room for the chaining command.  Since we're about to emit the
+          * chaining command, let's set it back where it should go.
           */
+         cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
+         assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
+
          emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
       } else {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
       }
@@ -1029,6 +1039,12 @@ _compare_bo_handles(const void *_bo1, const void *_bo2)
    return (*bo1)->gem_handle - (*bo2)->gem_handle;
 }
 
+static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc);
+
 static VkResult
 anv_execbuf_add_bo(struct anv_execbuf *exec,
                    struct anv_bo *bo,
@@ -1088,7 +1104,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
       obj->relocs_ptr = 0;
       obj->alignment = 0;
       obj->offset = bo->offset;
-      obj->flags = bo->flags | extra_flags;
+      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
       obj->rsvd1 = 0;
       obj->rsvd2 = 0;
    }
@@ -1118,37 +1134,46 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
          }
       }
 
-      if (relocs->deps && relocs->deps->entries > 0) {
-         const uint32_t entries = relocs->deps->entries;
-         struct anv_bo **bos =
-            vk_alloc(alloc, entries * sizeof(*bos),
-                     8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-         if (bos == NULL)
-            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
+   }
 
-         struct set_entry *entry;
-         struct anv_bo **bo = bos;
-         set_foreach(relocs->deps, entry) {
-            *bo++ = (void *)entry->key;
-         }
+   return VK_SUCCESS;
+}
 
-         qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
+/* Add BO dependencies to execbuf */
+static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc)
+{
+   if (!deps || deps->entries <= 0)
+      return VK_SUCCESS;
 
-         VkResult result = VK_SUCCESS;
-         for (bo = bos; bo < bos + entries; bo++) {
-            result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
-            if (result != VK_SUCCESS)
-               break;
-         }
+   const uint32_t entries = deps->entries;
+   struct anv_bo **bos =
+      vk_alloc(alloc, entries * sizeof(*bos),
+               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (bos == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-         vk_free(alloc, bos);
+   struct anv_bo **bo = bos;
+   set_foreach(deps, entry) {
+      *bo++ = (void *)entry->key;
+   }
 
-         if (result != VK_SUCCESS)
-            return result;
-      }
+   qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
+
+   VkResult result = VK_SUCCESS;
+   for (bo = bos; bo < bos + entries; bo++) {
+      result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
+      if (result != VK_SUCCESS)
+         break;
    }
 
-   return VK_SUCCESS;
+   vk_free(alloc, bos);
+
+   return result;
 }
 
 static VkResult
@@ -1222,7 +1247,7 @@ adjust_relocations_to_state_pool(struct anv_state_pool *pool,
     * relocations that point to the pool bo with the correct offset.
     */
    for (size_t i = 0; i < relocs->num_relocs; i++) {
-      if (relocs->reloc_bos[i] == &pool->block_pool.bo) {
+      if (relocs->reloc_bos[i] == pool->block_pool.bo) {
          /* Adjust the delta value in the relocation to correctly
           * correspond to the new delta.  Initially, this value may have
           * been negative (if treated as unsigned), but we trust in
@@ -1330,7 +1355,7 @@ relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
     * given time.  The only option is to always relocate them.
     */
    anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
-                        &cmd_buffer->device->surface_state_pool.block_pool.bo,
+                        cmd_buffer->device->surface_state_pool.block_pool.bo,
                         true /* always relocate surface states */);
 
    /* Since we own all of the batch buffers, we know what values are stored
@@ -1359,11 +1384,64 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
 
    adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
                                       cmd_buffer->last_ss_pool_center);
-   VkResult result = anv_execbuf_add_bo(execbuf, &ss_pool->block_pool.bo,
-                                        &cmd_buffer->surface_relocs, 0,
-                                        &cmd_buffer->device->alloc);
-   if (result != VK_SUCCESS)
-      return result;
+   VkResult result;
+   struct anv_bo *bo;
+   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
+      anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+      /* Add surface dependencies (BOs) to the execbuf */
+      anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
+                             &cmd_buffer->device->alloc);
+
+      /* Add the BOs for all memory objects */
+      list_for_each_entry(struct anv_device_memory, mem,
+                          &cmd_buffer->device->memory_objects, link) {
+         result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      struct anv_block_pool *pool;
+      pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->instruction_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->binding_table_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   } else {
+      /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
+       * will get added automatically by processing relocations on the batch
+       * buffer.  We have to add the surface state BO manually because it has
+       * relocations of its own that we need to be sure are processsed.
+       */
+      result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
+                                  &cmd_buffer->surface_relocs, 0,
+                                  &cmd_buffer->device->alloc);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
    /* First, we walk over all of the bos we've seen and add them and their
     * relocations to the validate list.
@@ -1520,6 +1598,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
                        VkFence _fence)
 {
    ANV_FROM_HANDLE(anv_fence, fence, _fence);
+   UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
 
    struct anv_execbuf execbuf;
    anv_execbuf_init(&execbuf);
@@ -1534,6 +1613,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_SEMAPHORE_TYPE_BO:
+         assert(!pdevice->has_syncobj);
          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
                                      0, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1541,6 +1621,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
          break;
 
       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
+         assert(!pdevice->has_syncobj);
          if (in_fence == -1) {
             in_fence = impl->fd;
          } else {
@@ -1590,6 +1671,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_SEMAPHORE_TYPE_BO:
+         assert(!pdevice->has_syncobj);
          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
                                      EXEC_OBJECT_WRITE, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1597,6 +1679,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
          break;
 
       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
+         assert(!pdevice->has_syncobj);
          need_out_fence = true;
          break;
 
@@ -1631,6 +1714,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_FENCE_TYPE_BO:
+         assert(!pdevice->has_syncobj_wait);
          result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
                                      EXEC_OBJECT_WRITE, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1650,10 +1734,20 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
       }
    }
 
-   if (cmd_buffer)
+   if (cmd_buffer) {
+      if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
+         struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos);
+
+         device->cmd_buffer_being_decoded = cmd_buffer;
+         gen_print_batch(&device->decoder_ctx, (*bo)->bo.map,
+                         (*bo)->bo.size, (*bo)->bo.offset, false);
+         device->cmd_buffer_being_decoded = NULL;
+      }
+
       result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
-   else
+   } else {
       result = setup_empty_execbuf(&execbuf, device);
+   }
 
    if (result != VK_SUCCESS)
       return result;
@@ -1694,6 +1788,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
    }
 
    if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
+      assert(!pdevice->has_syncobj_wait);
       /* BO fences can't be shared, so they can't be temporary. */
       assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
 
@@ -1711,6 +1806,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
    }
 
    if (result == VK_SUCCESS && need_out_fence) {
+      assert(!pdevice->has_syncobj_wait);
       int out_fence = execbuf.execbuf.rsvd2 >> 32;
       for (uint32_t i = 0; i < num_out_semaphores; i++) {
          ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);