From: Jason Ekstrand Date: Thu, 30 Jul 2015 21:59:02 +0000 (-0700) Subject: vk: Re-name command buffer implementation files X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=26ba0ad54d6bef6237abfabf5a3f572c325951d3;p=mesa.git vk: Re-name command buffer implementation files Previously, the command buffer implementation was split between anv_cmd_buffer.c and anv_cmd_emit.c. However, this naming convention was confusing because none of the Vulkan entrypoints for anv_cmd_buffer were actually in anv_cmd_buffer.c. This changes it so that anv_cmd_buffer.c is what you think it is and the internals are in anv_batch_chain.c. --- diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 6d1212c532c..c816f97034f 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -57,7 +57,7 @@ libvulkan_la_SOURCES = \ anv_allocator.c \ anv_aub.c \ anv_cmd_buffer.c \ - anv_cmd_emit.c \ + anv_batch_chain.c \ anv_compiler.cpp \ anv_device.c \ anv_entrypoints.c \ diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..2f09248acee --- /dev/null +++ b/src/vulkan/anv_batch_chain.c @@ -0,0 +1,926 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + struct anv_device *device, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->reloc_bos == NULL) { + anv_device_free(device, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + return anv_reloc_list_init_clone(list, device, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +{ + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) { + anv_device_free(device, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + anv_reloc_list_grow(list, device, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, device, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->device, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->device, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, device); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_device *device, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +{ + anv_reloc_list_finish(&bbo->relocs, device); + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_device_free(device, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo; + result = anv_batch_bo_clone(device, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, device); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); +} + +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; +} + +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &bbo->bo, 0 }, + ); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_bo *surface_bo = + anv_cmd_buffer_current_surface_bo(cmd_buffer); + struct anv_state state; + + state.offset = align_u32(cmd_buffer->surface_next, alignment); + if (state.offset + size > surface_bo->size) + return (struct anv_state) { 0 }; + + state.map = surface_bo->map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size <= surface_bo->size); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *new_bbo, *old_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + /* Finish off the old buffer */ + old_bbo->length = cmd_buffer->surface_next; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer->surface_next = 1; + + list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo, *surface_bbo; + struct anv_device *device = cmd_buffer->device; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + list_inithead(&cmd_buffer->surface_bos); + + result = anv_batch_bo_create(device, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.device = device; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + result = anv_batch_bo_create(device, &surface_bbo); + if (result != VK_SUCCESS) + goto fail_batch_bo; + + list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_surface_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_surface_bo: + anv_batch_bo_destroy(surface_bbo, device); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, device); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + /* Destroy all of the surface state buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + anv_device_free(device, cmd_buffer->execbuf2.objects); + anv_device_free(device, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + + cmd_buffer->surface_next = 1; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *surface_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } else { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < + ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (cmd_buffer->opt_flags & + VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* For chaining mode, we need to increment the number of + * relocations. This is because, when we chain, we need to add + * an MI_BATCH_BUFFER_START command. Adding this command will + * also add a relocation. In order to handle theis we'll + * increment it here and decrement it right before adding the + * MI_BATCH_BUFFER_START command. + */ + anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->length = cmd_buffer->surface_next; +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &first_bbo->bo, 0 }, + ); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + + struct GEN8_MI_BATCH_BUFFER_START ret = { + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &this_bbo->bo, offset }, + }; + last_bbo->relocs.num_relocs++; + GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, + last_bbo->bo.map + last_bbo->length, + &ret); + + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary->device, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + /* Mark the surface buffer from the secondary as seen */ + anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx] = + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = + &first_batch_bo->bo; + first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 28a3af7a9b8..3b9e67fdd0f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -31,896 +31,1395 @@ /** \file anv_cmd_buffer.c * - * This file contains functions related to anv_cmd_buffer as a data - * structure. This involves everything required to create and destroy - * the actual batch buffers as well as link them together and handle - * relocations and surface state. It specifically does *not* contain any - * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. */ -/*-----------------------------------------------------------------------* - * Functions related to anv_reloc_list - *-----------------------------------------------------------------------*/ - -static VkResult -anv_reloc_list_init_clone(struct anv_reloc_list *list, - struct anv_device *device, - const struct anv_reloc_list *other_list) +static void +anv_cmd_state_init(struct anv_cmd_state *state) { - if (other_list) { - list->num_relocs = other_list->num_relocs; - list->array_length = other_list->array_length; - } else { - list->num_relocs = 0; - list->array_length = 256; - } + state->rs_state = NULL; + state->vp_state = NULL; + state->cb_state = NULL; + state->ds_state = NULL; + memset(&state->state_vf, 0, sizeof(state->state_vf)); + memset(&state->descriptors, 0, sizeof(state->descriptors)); + + state->dirty = 0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->pipeline = NULL; + state->vp_state = NULL; + state->rs_state = NULL; + state->ds_state = NULL; +} - list->relocs = - anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; - if (list->relocs == NULL) + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - list->reloc_bos = - anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + cmd_buffer->device = device; - if (list->reloc_bos == NULL) { - anv_device_free(device, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; - if (other_list) { - memcpy(list->relocs, other_list->relocs, - list->array_length * sizeof(*list->relocs)); - memcpy(list->reloc_bos, other_list->reloc_bos, - list->array_length * sizeof(*list->reloc_bos)); - } + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->level = pCreateInfo->level; + cmd_buffer->opt_flags = 0; + + anv_cmd_state_init(&cmd_buffer->state); + + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; } -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) { - return anv_reloc_list_init_clone(list, device, NULL); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; } void -anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); } -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, - size_t num_additional_relocs) +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) { - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; + cmd_buffer->opt_flags = pBeginInfo->flags; - struct drm_i915_gem_relocation_entry *new_relocs = - anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); - struct anv_bo **new_reloc_bos = - anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) { - anv_device_free(device, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* FIXME: We shouldn't be starting on the first subpass */ + anv_cmd_buffer_begin_subpass(cmd_buffer, + &cmd_buffer->state.pass->subpasses[0]); } - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); - - list->array_length = new_length; - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->state.current_pipeline = UINT32_MAX; return VK_SUCCESS; } -uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) { - struct drm_i915_gem_relocation_entry *entry; - int index; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; - anv_reloc_list_grow(list, device, 1); - /* TODO: Handle failure */ + anv_cmd_buffer_end_batch_buffer(cmd_buffer); - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = 0; - entry->write_domain = 0; + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } - return target_bo->offset + delta; + return VK_SUCCESS; } -static void -anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, - struct anv_reloc_list *other, uint32_t offset) +void anv_CmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) { - anv_reloc_list_grow(list, device, other->num_relocs); - /* TODO: Handle failure */ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - list->num_relocs += other->num_relocs; + default: + assert(!"invalid bind point"); + break; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch - *-----------------------------------------------------------------------*/ +void anv_CmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) + cmd_buffer->state.vp_state = vp_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) { - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - void *p = batch->next; + cmd_buffer->state.rs_state = rs_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - return p; + cmd_buffer->state.cb_state = cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; } -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) { - return anv_reloc_list_add(batch->relocs, batch->device, - location - batch->start, bo, delta); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->state.ds_state = ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; } -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +void anv_CmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) { - uint32_t size, offset; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; - size = other->next - other->start; - assert(size % 4 == 0); + assert(firstSet + setCount < MAX_SETS); - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; - assert(batch->next + size <= batch->end); + cmd_buffer->state.descriptors[firstSet + i].set = set; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); - memcpy(batch->next, other->start, size); + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->device, - other->relocs, offset); + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - batch->next += size; + dynamic_slot += set_layout->num_dynamic_buffers; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static VkResult -anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +void anv_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) { - VkResult result; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - result = anv_reloc_list_init(&bbo->relocs, device); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - *bbo_out = bbo; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} - return VK_SUCCESS; +void anv_CmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ - return result; + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + } } static VkResult -anv_batch_bo_clone(struct anv_device *device, - const struct anv_batch_bo *other_bbo, - struct anv_batch_bo **bbo_out) +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) { - VkResult result; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t attachments, bias, size; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + attachments = subpass->color_count; + } else { + bias = 0; + attachments = 0; + } - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + if (attachments + surface_count == 0) + return VK_SUCCESS; - result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + size = (bias + surface_count) * sizeof(uint32_t); + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; - bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; - *bbo_out = bbo; + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; - return VK_SUCCESS; + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - return result; -} + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; - bbo->relocs.num_relocs = 0; + memcpy(state.map, view->view.surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->view.bo, view->view.offset); + + bt_map[a] = state.offset; + } + + if (layout == NULL) + return VK_SUCCESS; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + if (!view) + continue; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + anv_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) +static VkResult +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state->map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) { - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; } static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - anv_reloc_list_finish(&bbo->relocs, device); - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - anv_device_free(device, bbo); -} + uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; -static VkResult -anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, - struct list_head *new_list) -{ VkResult result = VK_SUCCESS; - - list_inithead(new_list); - - struct anv_batch_bo *prev_bbo = NULL; - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo; - result = anv_batch_bo_clone(device, bbo, &new_bbo); + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; - list_addtail(&new_bbo->link, new_list); - - if (prev_bbo) { - /* As we clone this list of batch_bo's, they chain one to the - * other using MI_BATCH_BUFFER_START commands. We need to fix up - * those relocations as we go. Fortunately, this is pretty easy - * as it will always be the last relocation in the list. - */ - uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; - } - - prev_bbo = new_bbo; } if (result != VK_SUCCESS) { - list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, device); - } + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - return result; -} + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); -} + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); -} + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } -struct anv_bo * -anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; } -struct anv_reloc_list * -anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) { - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + + return state; } -static void -cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) { - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_batch_bo *current_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_state state; + uint32_t *p; - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &bbo->bo, 0 }, - ); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - anv_batch_bo_finish(current_bbo, batch); + return state; } static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { - struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo; + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); return VK_SUCCESS; } -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_bo *surface_bo = - anv_cmd_buffer_current_surface_bo(cmd_buffer); - struct anv_state state; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; - state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > surface_bo->size) - return (struct anv_state) { 0 }; + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - state.map = surface_bo->map + state.offset; - state.alloc_size = size; - cmd_buffer->surface_next = state.offset + size; + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } - assert(state.offset + size <= surface_bo->size); + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - return state; -} + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - size, alignment); + cmd_buffer->state.compute_dirty = 0; } -VkResult -anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *new_bbo, *old_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; - /* Finish off the old buffer */ - old_bbo->length = cmd_buffer->surface_next; + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; } - *seen_bbo = new_bbo; - cmd_buffer->surface_next = 1; + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } - list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); - return VK_SUCCESS; -} + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } -VkResult -anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo, *surface_bbo; - struct anv_device *device = cmd_buffer->device; - VkResult result; + if (cmd_buffer->state.descriptors_dirty) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); + } - list_inithead(&cmd_buffer->batch_bos); - list_inithead(&cmd_buffer->surface_bos); + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + } - result = anv_batch_bo_create(device, &batch_bo); - if (result != VK_SUCCESS) - return result; + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } - list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.state_vf, pipeline->state_vf); + } - cmd_buffer->batch.device = device; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} - anv_batch_bo_start(batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); +void anv_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - result = anv_batch_bo_create(device, &surface_bbo); - if (result != VK_SUCCESS) - goto fail_batch_bo; + anv_cmd_buffer_flush_state(cmd_buffer); - list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} - int success = anv_vector_init(&cmd_buffer->seen_bbos, - sizeof(struct anv_bo *), - 8 * sizeof(struct anv_bo *)); - if (!success) - goto fail_surface_bo; +void anv_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + anv_cmd_buffer_flush_state(cmd_buffer); - /* Start surface_next at 1 so surface offset 0 is invalid. */ - cmd_buffer->surface_next = 1; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} - return VK_SUCCESS; +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} - fail_surface_bo: - anv_batch_bo_destroy(surface_bbo, device); - fail_batch_bo: - anv_batch_bo_destroy(batch_bo, device); +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void anv_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} - return result; +void anv_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); } -void -anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) { - struct anv_device *device = cmd_buffer->device; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - anv_vector_finish(&cmd_buffer->seen_bbos); +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 - /* Destroy all of the batch buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - /* Destroy all of the surface state buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} - anv_device_free(device, cmd_buffer->execbuf2.objects); - anv_device_free(device, cmd_buffer->execbuf2.bos); +void anv_CmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); } -void -anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdWaitEvents( + VkCmdBuffer cmdBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) { - struct anv_device *device = cmd_buffer->device; + stub(); +} - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); - } - assert(!list_empty(&cmd_buffer->batch_bos)); +void anv_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; - anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), - &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ } - assert(!list_empty(&cmd_buffer->batch_bos)); - anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } - cmd_buffer->surface_next = 1; - /* Reset the list of seen buffers */ - cmd_buffer->seen_bbos.head = 0; - cmd_buffer->seen_bbos.tail = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_batch_bo(cmd_buffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); -} + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } -void -anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - struct anv_batch_bo *surface_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } else { - /* If this is a secondary command buffer, we need to determine the - * mode in which it will be executed with vkExecuteCommands. We - * determine this statically here so that this stays in sync with the - * actual ExecuteCommands implementation. - */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < - ANV_CMD_BUFFER_BATCH_SIZE / 2)) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (cmd_buffer->opt_flags & - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - - /* For chaining mode, we need to increment the number of - * relocations. This is because, when we chain, we need to add - * an MI_BATCH_BUFFER_START command. Adding this command will - * also add a relocation. In order to handle theis we'll - * increment it here and decrement it right before adding the - * MI_BATCH_BUFFER_START command. - */ - anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; - } else { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); } } - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } - surface_bbo->length = cmd_buffer->surface_next; + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } -static inline VkResult -anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, - struct list_head *list) +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) { - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); - if (bbo_ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + stub(); +} - *bbo_ptr = bbo; +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; } - return VK_SUCCESS; + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); } void -anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary) +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { - switch (secondary->exec_mode) { - case ANV_CMD_BUFFER_EXEC_MODE_EMIT: - anv_batch_emit_batch(&primary->batch, &secondary->batch); - break; - case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { - struct anv_batch_bo *first_bbo = - list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - - anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &first_bbo->bo, 0 }, - ); - - struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); - uint32_t offset = primary->batch.next - primary->batch.start; - - struct GEN8_MI_BATCH_BUFFER_START ret = { - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &this_bbo->bo, offset }, - }; - last_bbo->relocs.num_relocs++; - GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, - last_bbo->bo.map + last_bbo->length, - &ret); - - anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); - break; - } - case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { - struct list_head copy_list; - VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary->device, - ©_list); - if (result != VK_SUCCESS) - return; /* FIXME */ + cmd_buffer->state.subpass = subpass; - anv_cmd_buffer_add_seen_bbos(primary, ©_list); + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - struct anv_batch_bo *first_bbo = - list_first_entry(©_list, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(©_list, struct anv_batch_bo, link); + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} - cmd_buffer_chain_to_batch_bo(primary, first_bbo); +void anv_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - list_splicetail(©_list, &primary->batch_bos); + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; - anv_batch_bo_continue(last_bbo, &primary->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + const VkRect2D *render_area = &pRenderPassBegin->renderArea; - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - default: - assert(!"Invalid execution mode"); - } + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); - /* Mark the surface buffer from the secondary as seen */ - anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) { - struct drm_i915_gem_exec_object2 *obj = NULL; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } +void anv_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; + assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - if (relocs != NULL && obj->relocation_count == 0) { - /* This is the first time we've ever seen a list of relocations for - * this BO. Go ahead and set the relocations and then walk the list - * of relocations and add them all. - */ - obj->relocation_count = relocs->num_relocs; - obj->relocs_ptr = (uintptr_t) relocs->relocs; + for (uint32_t i = 0; i < cmdBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - for (size_t i = 0; i < relocs->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); - } + assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); - return VK_SUCCESS; + anv_cmd_buffer_add_secondary(primary, secondary); + } } -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) { - struct anv_bo *bo; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; + list_inithead(&pool->cmd_buffers); - list->relocs[i].target_handle = bo->index; - } + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; } -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +VkResult anv_DestroyCommandPool( + VkDevice _device, + VkCmdPool cmdPool) { - struct anv_batch *batch = &cmd_buffer->batch; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + anv_ResetCommandPool(_device, cmdPool, 0); - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + anv_device_free(device, pool); - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; - - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + return VK_SUCCESS; +} - cmd_buffer->execbuf2.objects[idx] = - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx] = - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx]->index = idx; +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = - &first_batch_bo->bo; - first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); } - /* Now we go through and fixup all of the relocation lists to point to - * the correct indices in the object array. We have to do this after we - * reorder the list above as some of the indices may have changed. - */ - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); - - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, - .batch_start_offset = 0, - .batch_len = batch->next - batch->start, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, - .rsvd1 = cmd_buffer->device->context_id, - .rsvd2 = 0, - }; - - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; + return VK_SUCCESS; } diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c deleted file mode 100644 index 3b9e67fdd0f..00000000000 --- a/src/vulkan/anv_cmd_emit.c +++ /dev/null @@ -1,1425 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** \file anv_cmd_buffer.c - * - * This file contains all of the stuff for emitting commands into a command - * buffer. This includes implementations of most of the vkCmd* - * entrypoints. This file is concerned entirely with state emission and - * not with the command buffer data structure itself. As far as this file - * is concerned, most of anv_cmd_buffer is magic. - */ - -static void -anv_cmd_state_init(struct anv_cmd_state *state) -{ - state->rs_state = NULL; - state->vp_state = NULL; - state->cb_state = NULL; - state->ds_state = NULL; - memset(&state->state_vf, 0, sizeof(state->state_vf)); - memset(&state->descriptors, 0, sizeof(state->descriptors)); - - state->dirty = 0; - state->vb_dirty = 0; - state->descriptors_dirty = 0; - state->pipeline = NULL; - state->vp_state = NULL; - state->rs_state = NULL; - state->ds_state = NULL; -} - -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->device = device; - - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); - if (result != VK_SUCCESS) - goto fail; - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - cmd_buffer->level = pCreateInfo->level; - cmd_buffer->opt_flags = 0; - - anv_cmd_state_init(&cmd_buffer->state); - - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail: anv_device_free(device, cmd_buffer); - - return result; -} - -VkResult anv_DestroyCommandBuffer( - VkDevice _device, - VkCmdBuffer _cmd_buffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); - - list_del(&cmd_buffer->pool_link); - - anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - - anv_cmd_state_init(&cmd_buffer->state); - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = device->scratch_block_pool.size; - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); -} - -VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - cmd_buffer->opt_flags = pBeginInfo->flags; - - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { - cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->framebuffer); - cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->renderPass); - - /* FIXME: We shouldn't be starting on the first subpass */ - anv_cmd_buffer_begin_subpass(cmd_buffer, - &cmd_buffer->state.pass->subpasses[0]); - } - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - cmd_buffer->state.current_pipeline = UINT32_MAX; - - return VK_SUCCESS; -} - -VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - - return VK_SUCCESS; -} - -void anv_CmdBindPipeline( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.pipeline = pipeline; - cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - default: - assert(!"invalid bind point"); - break; - } -} - -void anv_CmdBindDynamicViewportState( - VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - - cmd_buffer->state.vp_state = vp_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; -} - -void anv_CmdBindDynamicRasterState( - VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - - cmd_buffer->state.rs_state = rs_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; -} - -void anv_CmdBindDynamicColorBlendState( - VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - - cmd_buffer->state.cb_state = cb_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; -} - -void anv_CmdBindDynamicDepthStencilState( - VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); - - cmd_buffer->state.ds_state = ds_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; -} - -void anv_CmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t setCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - - assert(firstSet + setCount < MAX_SETS); - - uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < setCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - cmd_buffer->state.descriptors[firstSet + i].set = set; - - assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, - pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - - dynamic_slot += set_layout->num_dynamic_buffers; - } -} - -void anv_CmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, - }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); -} - -void anv_CmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, - uint32_t startBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ - - assert(startBinding + bindingCount < MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[startBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); - } -} - -static VkResult -cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_layout *layout; - uint32_t attachments, bias, size; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; - else - layout = cmd_buffer->state.pipeline->layout; - - if (stage == VK_SHADER_STAGE_FRAGMENT) { - bias = MAX_RTS; - attachments = subpass->color_count; - } else { - bias = 0; - attachments = 0; - } - - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - - if (attachments + surface_count == 0) - return VK_SUCCESS; - - size = (bias + surface_count) * sizeof(uint32_t); - *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - /* This is highly annoying. The Vulkan spec puts the depth-stencil - * attachments in with the color attachments. Unfortunately, thanks to - * other aspects of the API, we cana't really saparate them before this - * point. Therefore, we have to walk all of the attachments but only - * put the color attachments into the binding table. - */ - for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *attachment = - fb->attachments[subpass->color_attachments[a]]; - - assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *view = - (const struct anv_color_attachment_view *)attachment; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->view.surface_state.map, 64); - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->view.bo, view->view.offset); - - bt_map[a] = state.offset; - } - - if (layout == NULL) - return VK_SUCCESS; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *surface_slots = - set_layout->stage[stage].surface_start; - - uint32_t start = bias + layout->set[set].surface_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_surface_view *view = - d->set->descriptors[surface_slots[b].index].view; - - if (!view) - continue; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - uint32_t offset; - if (surface_slots[b].dynamic_slot >= 0) { - uint32_t dynamic_offset = - d->dynamic_offsets[surface_slots[b].dynamic_slot]; - - offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, offset); - - bt_map[start + b] = state.offset; - } - } - - return VK_SUCCESS; -} - -static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) -{ - struct anv_pipeline_layout *layout; - uint32_t sampler_count; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; - else - layout = cmd_buffer->state.pipeline->layout; - - sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) - return VK_SUCCESS; - - uint32_t size = sampler_count * 16; - *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *sampler_slots = - set_layout->stage[stage].sampler_start; - - uint32_t start = layout->set[set].sampler_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_sampler *sampler = - d->set->descriptors[sampler_slots[b].index].sampler; - - if (!sampler) - continue; - - memcpy(state->map + (start + b) * 16, - sampler->state, sizeof(sampler->state)); - } - } - - return VK_SUCCESS; -} - -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) -{ - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); - } - - if (surfaces.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); - } - - return VK_SUCCESS; -} - -static void -flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); - } - } - - cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; -} - -static struct anv_state -anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - memcpy(state.map, a, dwords * 4); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); - - return state; -} - -static struct anv_state -anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - uint32_t *p; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - p = state.map; - for (uint32_t i = 0; i < dwords; i++) - p[i] = a[i] | b[i]; - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - - return state; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; - } - - cmd_buffer->state.compute_dirty = 0; -} - -static void -anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN8_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GEN8_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GEN8_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty) - flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { - struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_sf, - pipeline->state_sf); - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_raster, - pipeline->state_raster); - } - - if (cmd_buffer->state.ds_state && - (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY))) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.ds_state->state_wm_depth_stencil, - pipeline->state_wm_depth_stencil); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->state.cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.state_vf, pipeline->state_vf); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void anv_CmdDraw( - VkCmdBuffer cmdBuffer, - uint32_t firstVertex, - uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = SEQUENTIAL, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void anv_CmdDrawIndexed( - VkCmdBuffer cmdBuffer, - uint32_t firstIndex, - uint32_t indexCount, - int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = RANDOM, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -static void -anv_batch_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void anv_CmdDrawIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL); -} - -void anv_CmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM); -} - -void anv_CmdDispatch( - VkCmdBuffer cmdBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void anv_CmdDispatchIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -void anv_CmdSetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdResetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdWaitEvents( - VkCmdBuffer cmdBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - stub(); -} - -void anv_CmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GEN8_PIPE_CONTROL cmd = { - GEN8_PIPE_CONTROL_header, - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_TRANSITION_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); - GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); -} - -void anv_CmdPushConstants( - VkCmdBuffer cmdBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values) -{ - stub(); -} - -static void -anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view; - - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; - } - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->state.framebuffer->height - 1, - .Width = cmd_buffer->state.framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); -} - -void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void anv_CmdBeginRenderPass( - VkCmdBuffer cmdBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); - - anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); -} - -void anv_CmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); -} - -void anv_CmdEndRenderPass( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - -void anv_CmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); - - assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - - for (uint32_t i = 0; i < cmdBuffersCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); - - anv_cmd_buffer_add_secondary(primary, secondary); - } -} - -VkResult anv_CreateCommandPool( - VkDevice _device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_pool *pool; - - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - list_inithead(&pool->cmd_buffers); - - *pCmdPool = anv_cmd_pool_to_handle(pool); - - return VK_SUCCESS; -} - -VkResult anv_DestroyCommandPool( - VkDevice _device, - VkCmdPool cmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - - anv_ResetCommandPool(_device, cmdPool, 0); - - anv_device_free(device, pool); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - - list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); - } - - return VK_SUCCESS; -}