anv+tu+radv: delete unusable dev_icd.json
[mesa.git] / src / intel / vulkan / anv_batch_chain.c
index 9d3884c8ac30f54a8a518e55b321c1c2d3a3a1ec..114eda0336f4b9bf8f236fe955bebfef932b650b 100644 (file)
@@ -75,8 +75,7 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    }
 
-   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                 _mesa_key_pointer_equal);
+   list->deps = _mesa_pointer_set_create(NULL);
 
    if (!list->deps) {
       vk_free(alloc, list->relocs);
@@ -89,7 +88,6 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
              list->array_length * sizeof(*list->relocs));
       memcpy(list->reloc_bos, other_list->reloc_bos,
              list->array_length * sizeof(*list->reloc_bos));
-      struct set_entry *entry;
       set_foreach(other_list->deps, entry) {
          _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
       }
@@ -205,7 +203,6 @@ anv_reloc_list_append(struct anv_reloc_list *list,
 
    list->num_relocs += other->num_relocs;
 
-   struct set_entry *entry;
    set_foreach(other->deps, entry) {
       _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
    }
@@ -429,14 +426,23 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
    assert(((*bb_start >> 29) & 0x07) == 0);
    assert(((*bb_start >> 23) & 0x3f) == 49);
 
-   uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
-   assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
+   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
+      assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED);
+      assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED);
 
-   prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo;
-   prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
+      write_reloc(cmd_buffer->device,
+                  prev_bbo->bo.map + bb_start_offset + 4,
+                  next_bbo->bo.offset + next_bbo_offset, true);
+   } else {
+      uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
+      assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
+
+      prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo;
+      prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
 
-   /* Use a bogus presumed offset to force a relocation */
-   prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1;
+      /* Use a bogus presumed offset to force a relocation */
+      prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1;
+   }
 }
 
 static void
@@ -494,7 +500,7 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
    return (struct anv_address) {
-      .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
+      .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
       .offset = bt_block->offset,
    };
 }
@@ -522,7 +528,7 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
    anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
       bbs.DWordLength               = cmd_buffer->device->info.gen < 8 ?
                                       gen7_length : gen8_length;
-      bbs._2ndLevelBatchBuffer      = _1stlevelbatch;
+      bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
       bbs.AddressSpaceIndicator     = ASI_PPGTT;
       bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
    }
@@ -672,8 +678,8 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
       return (struct anv_state) { 0 };
 
    state.offset = cmd_buffer->bt_next;
-   state.map = anv_binding_table_pool(device)->block_pool.map +
-      bt_block->offset + state.offset;
+   state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
+                                  bt_block->offset + state.offset);
 
    cmd_buffer->bt_next += state.alloc_size;
 
@@ -885,8 +891,17 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
           * It doesn't matter where it points now so long as has a valid
           * relocation.  We'll adjust it later as part of the chaining
           * process.
+          *
+          * We set the end of the batch a little short so we would be sure we
+          * have room for the chaining command.  Since we're about to emit the
+          * chaining command, let's set it back where it should go.
           */
+         cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
+         assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
+
          emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
       } else {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
       }
@@ -987,6 +1002,8 @@ struct anv_execbuf {
    /* Allocated length of the 'objects' and 'bos' arrays */
    uint32_t                                  array_length;
 
+   bool                                      has_relocs;
+
    uint32_t                                  fence_count;
    uint32_t                                  fence_array_length;
    struct drm_i915_gem_exec_fence *          fences;
@@ -1018,6 +1035,12 @@ _compare_bo_handles(const void *_bo1, const void *_bo2)
    return (*bo1)->gem_handle - (*bo2)->gem_handle;
 }
 
+static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc);
+
 static VkResult
 anv_execbuf_add_bo(struct anv_execbuf *exec,
                    struct anv_bo *bo,
@@ -1077,60 +1100,76 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
       obj->relocs_ptr = 0;
       obj->alignment = 0;
       obj->offset = bo->offset;
-      obj->flags = bo->flags | extra_flags;
+      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
       obj->rsvd1 = 0;
       obj->rsvd2 = 0;
    }
 
-   if (relocs != NULL && obj->relocation_count == 0) {
-      /* This is the first time we've ever seen a list of relocations for
-       * this BO.  Go ahead and set the relocations and then walk the list
-       * of relocations and add them all.
-       */
-      obj->relocation_count = relocs->num_relocs;
-      obj->relocs_ptr = (uintptr_t) relocs->relocs;
+   if (relocs != NULL) {
+      assert(obj->relocation_count == 0);
 
-      for (size_t i = 0; i < relocs->num_relocs; i++) {
-         VkResult result;
+      if (relocs->num_relocs > 0) {
+         /* This is the first time we've ever seen a list of relocations for
+          * this BO.  Go ahead and set the relocations and then walk the list
+          * of relocations and add them all.
+          */
+         exec->has_relocs = true;
+         obj->relocation_count = relocs->num_relocs;
+         obj->relocs_ptr = (uintptr_t) relocs->relocs;
 
-         /* A quick sanity check on relocations */
-         assert(relocs->relocs[i].offset < bo->size);
-         result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
-                                     extra_flags, alloc);
+         for (size_t i = 0; i < relocs->num_relocs; i++) {
+            VkResult result;
 
-         if (result != VK_SUCCESS)
-            return result;
+            /* A quick sanity check on relocations */
+            assert(relocs->relocs[i].offset < bo->size);
+            result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
+                                        extra_flags, alloc);
+
+            if (result != VK_SUCCESS)
+               return result;
+         }
       }
 
-      const uint32_t entries = relocs->deps->entries;
-      struct anv_bo **bos =
-         vk_alloc(alloc, entries * sizeof(*bos),
-                  8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-      if (bos == NULL)
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
+   }
 
-      struct set_entry *entry;
-      struct anv_bo **bo = bos;
-      set_foreach(relocs->deps, entry) {
-         *bo++ = (void *)entry->key;
-      }
+   return VK_SUCCESS;
+}
 
-      qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
+/* Add BO dependencies to execbuf */
+static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc)
+{
+   if (!deps || deps->entries <= 0)
+      return VK_SUCCESS;
 
-      VkResult result = VK_SUCCESS;
-      for (bo = bos; bo < bos + entries; bo++) {
-         result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
-         if (result != VK_SUCCESS)
-            break;
-      }
+   const uint32_t entries = deps->entries;
+   struct anv_bo **bos =
+      vk_alloc(alloc, entries * sizeof(*bos),
+               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (bos == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   struct anv_bo **bo = bos;
+   set_foreach(deps, entry) {
+      *bo++ = (void *)entry->key;
+   }
 
-      vk_free(alloc, bos);
+   qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
 
+   VkResult result = VK_SUCCESS;
+   for (bo = bos; bo < bos + entries; bo++) {
+      result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
       if (result != VK_SUCCESS)
-         return result;
+         break;
    }
 
-   return VK_SUCCESS;
+   vk_free(alloc, bos);
+
+   return result;
 }
 
 static VkResult
@@ -1204,7 +1243,7 @@ adjust_relocations_to_state_pool(struct anv_state_pool *pool,
     * relocations that point to the pool bo with the correct offset.
     */
    for (size_t i = 0; i < relocs->num_relocs; i++) {
-      if (relocs->reloc_bos[i] == &pool->block_pool.bo) {
+      if (relocs->reloc_bos[i] == pool->block_pool.bo) {
          /* Adjust the delta value in the relocation to correctly
           * correspond to the new delta.  Initially, this value may have
           * been negative (if treated as unsigned), but we trust in
@@ -1285,6 +1324,9 @@ static bool
 relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
                     struct anv_execbuf *exec)
 {
+   if (!exec->has_relocs)
+      return true;
+
    static int userspace_relocs = -1;
    if (userspace_relocs < 0)
       userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
@@ -1309,7 +1351,7 @@ relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
     * given time.  The only option is to always relocate them.
     */
    anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
-                        &cmd_buffer->device->surface_state_pool.block_pool.bo,
+                        cmd_buffer->device->surface_state_pool.block_pool.bo,
                         true /* always relocate surface states */);
 
    /* Since we own all of the batch buffers, we know what values are stored
@@ -1338,11 +1380,64 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
 
    adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
                                       cmd_buffer->last_ss_pool_center);
-   VkResult result = anv_execbuf_add_bo(execbuf, &ss_pool->block_pool.bo,
-                                        &cmd_buffer->surface_relocs, 0,
-                                        &cmd_buffer->device->alloc);
-   if (result != VK_SUCCESS)
-      return result;
+   VkResult result;
+   struct anv_bo *bo;
+   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
+      anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+      /* Add surface dependencies (BOs) to the execbuf */
+      anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
+                             &cmd_buffer->device->alloc);
+
+      /* Add the BOs for all memory objects */
+      list_for_each_entry(struct anv_device_memory, mem,
+                          &cmd_buffer->device->memory_objects, link) {
+         result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      struct anv_block_pool *pool;
+      pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->instruction_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->binding_table_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   } else {
+      /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
+       * will get added automatically by processing relocations on the batch
+       * buffer.  We have to add the surface state BO manually because it has
+       * relocations of its own that we need to be sure are processsed.
+       */
+      result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
+                                  &cmd_buffer->surface_relocs, 0,
+                                  &cmd_buffer->device->alloc);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
    /* First, we walk over all of the bos we've seen and add them and their
     * relocations to the validate list.
@@ -1388,14 +1483,20 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
       first_batch_bo->bo.index = last_idx;
    }
 
+   /* If we are pinning our BOs, we shouldn't have to relocate anything */
+   if (cmd_buffer->device->instance->physicalDevice.use_softpin)
+      assert(!execbuf->has_relocs);
+
    /* Now we go through and fixup all of the relocation lists to point to
     * the correct indices in the object array.  We have to do this after we
     * reorder the list above as some of the indices may have changed.
     */
-   u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
-      anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
+   if (execbuf->has_relocs) {
+      u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
+         anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
 
-   anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
+      anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
+   }
 
    if (!cmd_buffer->device->info.has_llc) {
       __builtin_ia32_mfence();
@@ -1493,6 +1594,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
                        VkFence _fence)
 {
    ANV_FROM_HANDLE(anv_fence, fence, _fence);
+   UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
 
    struct anv_execbuf execbuf;
    anv_execbuf_init(&execbuf);
@@ -1507,6 +1609,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_SEMAPHORE_TYPE_BO:
+         assert(!pdevice->has_syncobj);
          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
                                      0, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1514,6 +1617,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
          break;
 
       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
+         assert(!pdevice->has_syncobj);
          if (in_fence == -1) {
             in_fence = impl->fd;
          } else {
@@ -1563,6 +1667,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_SEMAPHORE_TYPE_BO:
+         assert(!pdevice->has_syncobj);
          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
                                      EXEC_OBJECT_WRITE, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1570,6 +1675,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
          break;
 
       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
+         assert(!pdevice->has_syncobj);
          need_out_fence = true;
          break;
 
@@ -1604,6 +1710,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
 
       switch (impl->type) {
       case ANV_FENCE_TYPE_BO:
+         assert(!pdevice->has_syncobj_wait);
          result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
                                      EXEC_OBJECT_WRITE, &device->alloc);
          if (result != VK_SUCCESS)
@@ -1623,10 +1730,20 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
       }
    }
 
-   if (cmd_buffer)
+   if (cmd_buffer) {
+      if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
+         struct anv_batch_bo **bo = u_vector_head(&cmd_buffer->seen_bbos);
+
+         device->cmd_buffer_being_decoded = cmd_buffer;
+         gen_print_batch(&device->decoder_ctx, (*bo)->bo.map,
+                         (*bo)->bo.size, (*bo)->bo.offset, false);
+         device->cmd_buffer_being_decoded = NULL;
+      }
+
       result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
-   else
+   } else {
       result = setup_empty_execbuf(&execbuf, device);
+   }
 
    if (result != VK_SUCCESS)
       return result;
@@ -1667,6 +1784,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
    }
 
    if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
+      assert(!pdevice->has_syncobj_wait);
       /* BO fences can't be shared, so they can't be temporary. */
       assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
 
@@ -1684,6 +1802,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
    }
 
    if (result == VK_SUCCESS && need_out_fence) {
+      assert(!pdevice->has_syncobj_wait);
       int out_fence = execbuf.execbuf.rsvd2 >> 32;
       for (uint32_t i = 0; i < num_out_semaphores; i++) {
          ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);