+ return VK_SUCCESS;
+}
+
+static VkResult
+setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
+{
+ VkResult result = anv_execbuf_add_bo(device, execbuf,
+ device->trivial_batch_bo,
+ NULL, 0);
+ if (result != VK_SUCCESS)
+ return result;
+
+ execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf->objects,
+ .buffer_count = execbuf->bo_count,
+ .batch_start_offset = 0,
+ .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
+ .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | I915_EXEC_NO_RELOC,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ };
+
+ return VK_SUCCESS;
+}
+
+/* We lock around execbuf for three main reasons:
+ *
+ * 1) When a block pool is resized, we create a new gem handle with a
+ * different size and, in the case of surface states, possibly a different
+ * center offset but we re-use the same anv_bo struct when we do so. If
+ * this happens in the middle of setting up an execbuf, we could end up
+ * with our list of BOs out of sync with our list of gem handles.
+ *
+ * 2) The algorithm we use for building the list of unique buffers isn't
+ * thread-safe. While the client is supposed to syncronize around
+ * QueueSubmit, this would be extremely difficult to debug if it ever came
+ * up in the wild due to a broken app. It's better to play it safe and
+ * just lock around QueueSubmit.
+ *
+ * 3) The anv_cmd_buffer_execbuf function may perform relocations in
+ * userspace. Due to the fact that the surface state buffer is shared
+ * between batches, we can't afford to have that happen from multiple
+ * threads at the same time. Even though the user is supposed to ensure
+ * this doesn't happen, we play it safe as in (2) above.
+ *
+ * Since the only other things that ever take the device lock such as block
+ * pool resize only rarely happen, this will almost never be contended so
+ * taking a lock isn't really an expensive operation in this case.
+ */
+VkResult
+anv_queue_execbuf_locked(struct anv_queue *queue,
+ struct anv_queue_submit *submit)
+{
+ struct anv_device *device = queue->device;
+ struct anv_execbuf execbuf;
+ anv_execbuf_init(&execbuf);
+ execbuf.alloc = submit->alloc;
+ execbuf.alloc_scope = submit->alloc_scope;
+ execbuf.perf_query_pass = submit->perf_query_pass;
+
+ /* Always add the workaround BO as it includes a driver identifier for the
+ * error_state.
+ */
+ VkResult result =
+ anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ for (uint32_t i = 0; i < submit->fence_bo_count; i++) {
+ int signaled;
+ struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled);
+
+ result = anv_execbuf_add_bo(device, &execbuf, bo, NULL,
+ signaled ? EXEC_OBJECT_WRITE : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ }
+
+ if (submit->cmd_buffer) {
+ result = setup_execbuf_for_cmd_buffer(&execbuf, submit->cmd_buffer);
+ } else if (submit->simple_bo) {
+ result = anv_execbuf_add_bo(device, &execbuf, submit->simple_bo, NULL, 0);
+ if (result != VK_SUCCESS)
+ goto error;
+
+ execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
+ .buffers_ptr = (uintptr_t) execbuf.objects,
+ .buffer_count = execbuf.bo_count,
+ .batch_start_offset = 0,
+ .batch_len = submit->simple_bo_size,
+ .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | I915_EXEC_NO_RELOC,
+ .rsvd1 = device->context_id,
+ .rsvd2 = 0,
+ };
+ } else {
+ result = setup_empty_execbuf(&execbuf, queue->device);
+ }
+
+ if (result != VK_SUCCESS)
+ goto error;
+
+ const bool has_perf_query =
+ submit->perf_query_pass >= 0 &&
+ submit->cmd_buffer &&
+ submit->cmd_buffer->perf_query_pool;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
+ if (submit->cmd_buffer) {
+ if (has_perf_query) {
+ struct anv_query_pool *query_pool = submit->cmd_buffer->perf_query_pool;
+ struct anv_bo *pass_batch_bo = query_pool->bo;
+ uint64_t pass_batch_offset =
+ khr_perf_query_preamble_offset(query_pool,
+ submit->perf_query_pass);
+
+ gen_print_batch(&device->decoder_ctx,
+ pass_batch_bo->map + pass_batch_offset, 64,
+ pass_batch_bo->offset + pass_batch_offset, false);
+ }
+
+ struct anv_batch_bo **bo = u_vector_tail(&submit->cmd_buffer->seen_bbos);
+ device->cmd_buffer_being_decoded = submit->cmd_buffer;
+ gen_print_batch(&device->decoder_ctx, (*bo)->bo->map,
+ (*bo)->bo->size, (*bo)->bo->offset, false);
+ device->cmd_buffer_being_decoded = NULL;
+ } else if (submit->simple_bo) {
+ gen_print_batch(&device->decoder_ctx, submit->simple_bo->map,
+ submit->simple_bo->size, submit->simple_bo->offset, false);
+ } else {
+ gen_print_batch(&device->decoder_ctx,
+ device->trivial_batch_bo->map,
+ device->trivial_batch_bo->size,
+ device->trivial_batch_bo->offset, false);
+ }
+ }
+
+ if (submit->fence_count > 0) {
+ assert(device->physical->has_syncobj);
+ execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
+ execbuf.execbuf.num_cliprects = submit->fence_count;
+ execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
+ }
+
+ if (submit->in_fence != -1) {
+ execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
+ execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence;
+ }
+
+ if (submit->need_out_fence)
+ execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
+
+ if (has_perf_query) {
+ struct anv_query_pool *query_pool = submit->cmd_buffer->perf_query_pool;
+ assert(submit->perf_query_pass < query_pool->n_passes);
+ struct gen_perf_query_info *query_info =
+ query_pool->pass_query[submit->perf_query_pass];
+
+ /* Some performance queries just the pipeline statistic HW, no need for
+ * OA in that case, so no need to reconfigure.
+ */
+ if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
+ (query_info->kind == GEN_PERF_QUERY_TYPE_OA ||
+ query_info->kind == GEN_PERF_QUERY_TYPE_RAW)) {
+ int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
+ (void *)(uintptr_t) query_info->oa_metrics_set_id);
+ if (ret < 0) {
+ result = anv_device_set_lost(device,
+ "i915-perf config failed: %s",
+ strerror(ret));
+ }
+ }
+
+ struct anv_bo *pass_batch_bo = query_pool->bo;
+
+ struct drm_i915_gem_exec_object2 query_pass_object = {
+ .handle = pass_batch_bo->gem_handle,
+ .offset = pass_batch_bo->offset,
+ .flags = pass_batch_bo->flags,
+ };
+ struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
+ .buffers_ptr = (uintptr_t) &query_pass_object,
+ .buffer_count = 1,
+ .batch_start_offset = khr_perf_query_preamble_offset(query_pool,
+ submit->perf_query_pass),
+ .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
+ .rsvd1 = device->context_id,
+ };
+
+ int ret = queue->device->no_hw ? 0 :
+ anv_gem_execbuffer(queue->device, &query_pass_execbuf);
+ if (ret)
+ result = anv_queue_set_lost(queue, "execbuf2 failed: %m");
+ }
+
+ int ret = queue->device->no_hw ? 0 :
+ anv_gem_execbuffer(queue->device, &execbuf.execbuf);
+ if (ret)
+ result = anv_queue_set_lost(queue, "execbuf2 failed: %m");
+
+ struct drm_i915_gem_exec_object2 *objects = execbuf.objects;
+ for (uint32_t k = 0; k < execbuf.bo_count; k++) {
+ if (execbuf.bos[k]->flags & EXEC_OBJECT_PINNED)
+ assert(execbuf.bos[k]->offset == objects[k].offset);
+ execbuf.bos[k]->offset = objects[k].offset;
+ }
+
+ if (result == VK_SUCCESS && submit->need_out_fence)
+ submit->out_fence = execbuf.execbuf.rsvd2 >> 32;
+
+ error:
+ pthread_cond_broadcast(&device->queue_submit);