X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_cmd_buffer.c;h=c65eba287d660834f1c2341ff8fc30f070accd86;hb=c43b4bc85eddba8bc31665cfee5928bed8343516;hp=827c3ed4142dbf77c01c5a23bc3da63bf82e0a79;hpb=6bb6b5c341a5a75763d565565f164b63cff3388a;p=mesa.git diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 827c3ed4142..c65eba287d6 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "vk_format_info.h" + /** \file anv_cmd_buffer.c * * This file contains all of the stuff for emitting commands into a command @@ -115,113 +117,71 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_state *state = &cmd_buffer->state; + cmd_buffer->batch.status = VK_SUCCESS; + memset(&state->descriptors, 0, sizeof(state->descriptors)); - memset(&state->push_constants, 0, sizeof(state->push_constants)); + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + if (state->push_constants[i] != NULL) { + vk_free(&cmd_buffer->pool->alloc, state->push_constants[i]); + state->push_constants[i] = NULL; + } + } memset(state->binding_tables, 0, sizeof(state->binding_tables)); memset(state->samplers, 0, sizeof(state->samplers)); /* 0 isn't a valid config. This ensures that we always configure L3$. */ cmd_buffer->state.current_l3_config = 0; - state->dirty = ~0; + state->dirty = 0; state->vb_dirty = 0; + state->pending_pipe_bits = 0; state->descriptors_dirty = 0; state->push_constants_dirty = 0; state->pipeline = NULL; + state->framebuffer = NULL; + state->pass = NULL; + state->subpass = NULL; + state->push_constant_stages = 0; state->restart_index = UINT32_MAX; state->dynamic = default_dynamic_state; state->need_query_wa = true; + state->pma_fix_enabled = false; + state->hiz_enabled = false; if (state->attachments != NULL) { - anv_free(&cmd_buffer->pool->alloc, state->attachments); + vk_free(&cmd_buffer->pool->alloc, state->attachments); state->attachments = NULL; } state->gen7.index_buffer = NULL; } -/** - * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. - */ -void -anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); - - anv_free(&cmd_buffer->pool->alloc, state->attachments); - - if (pass->attachment_count == 0) { - state->attachments = NULL; - return; - } - - state->attachments = anv_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * - sizeof(state->attachments[0]), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ - abort(); - } - - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags clear_aspects = 0; - - if (anv_format_is_color(att->format)) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if (att->format->has_depth && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - } - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } - - state->attachments[i].pending_clear_aspects = clear_aspects; - if (clear_aspects) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } - } -} - -static VkResult +VkResult anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, uint32_t size) { struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; if (*ptr == NULL) { - *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) + if (*ptr == NULL) { + anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } } else if ((*ptr)->size < size) { - *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) + if (*ptr == NULL) { + anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } } (*ptr)->size = size; return VK_SUCCESS; } -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - static VkResult anv_create_cmd_buffer( struct anv_device * device, struct anv_cmd_pool * pool, @@ -231,11 +191,16 @@ static VkResult anv_create_cmd_buffer( struct anv_cmd_buffer *cmd_buffer; VkResult result; - cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + cmd_buffer->batch.status = VK_SUCCESS; + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + cmd_buffer->state.push_constants[i] = NULL; + } cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; cmd_buffer->pool = pool; @@ -251,6 +216,9 @@ static VkResult anv_create_cmd_buffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); + memset(&cmd_buffer->state.push_descriptor, 0, + sizeof(cmd_buffer->state.push_descriptor)); + if (pool) { list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); } else { @@ -265,7 +233,7 @@ static VkResult anv_create_cmd_buffer( return VK_SUCCESS; fail: - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer); return result; } @@ -288,9 +256,12 @@ VkResult anv_AllocateCommandBuffers( break; } - if (result != VK_SUCCESS) + if (result != VK_SUCCESS) { anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers); + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) + pCommandBuffers[i] = VK_NULL_HANDLE; + } return result; } @@ -305,8 +276,8 @@ anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer); } void anv_FreeCommandBuffers( @@ -318,31 +289,46 @@ void anv_FreeCommandBuffers( for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + if (!cmd_buffer) + continue; + anv_cmd_buffer_destroy(cmd_buffer); } } -VkResult anv_ResetCommandBuffer( - VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) +VkResult +anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->usage_flags = 0; cmd_buffer->state.current_pipeline = UINT32_MAX; anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_reset(cmd_buffer); + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &cmd_buffer->device->surface_state_block_pool); + + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &cmd_buffer->device->dynamic_state_block_pool); return VK_SUCCESS; } +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + return anv_cmd_buffer_reset(cmd_buffer); +} + void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { case 7: if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); else return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: @@ -354,72 +340,6 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) } } -VkResult anv_BeginCommandBuffer( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - /* If this is the first vkBeginCommandBuffer, we must *initialize* the - * command buffer's state. Otherwise, we must *reset* its state. In both - * cases we reset it. - * - * From the Vulkan 1.0 spec: - * - * If a command buffer is in the executable state and the command buffer - * was allocated from a command pool with the - * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then - * vkBeginCommandBuffer implicitly resets the command buffer, behaving - * as if vkResetCommandBuffer had been called with - * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts - * the command buffer in the recording state. - */ - anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); - - cmd_buffer->usage_flags = pBeginInfo->flags; - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || - !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - if (cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); - cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); - - struct anv_subpass *subpass = - &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - - anv_cmd_buffer_set_subpass(cmd_buffer, subpass); - } - - return VK_SUCCESS; -} - -VkResult anv_EndCommandBuffer( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - - return VK_SUCCESS; -} - void anv_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -608,40 +528,26 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; - if (cmd_buffer->state.descriptors[firstSet + i] != set) { - cmd_buffer->state.descriptors[firstSet + i] = set; - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - } + cmd_buffer->state.descriptors[firstSet + i] = set; if (set_layout->dynamic_offset_count > 0) { - anv_foreach_stage(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); - - struct anv_push_constants *push = - cmd_buffer->state.push_constants[s]; - - unsigned d = layout->set[firstSet + i].dynamic_offset_start; - const uint32_t *offsets = pDynamicOffsets + dynamic_slot; - struct anv_descriptor *desc = set->descriptors; - - for (unsigned b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index < 0) - continue; - - unsigned array_size = set_layout->binding[b].array_size; - for (unsigned j = 0; j < array_size; j++) { - uint32_t range = 0; - if (desc->buffer_view) - range = desc->buffer_view->range; - push->dynamic[d].offset = *(offsets++); - push->dynamic[d].range = range; - desc++; - d++; - } - } - } - cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + uint32_t dynamic_offset_start = + layout->set[firstSet + i].dynamic_offset_start; + + /* Assert that everything is in range */ + assert(dynamic_offset_start + set_layout->dynamic_offset_count <= + ARRAY_SIZE(cmd_buffer->state.dynamic_offsets)); + assert(dynamic_slot + set_layout->dynamic_offset_count <= + dynamicOffsetCount); + + typed_memcpy(&cmd_buffer->state.dynamic_offsets[dynamic_offset_start], + &pDynamicOffsets[dynamic_slot], + set_layout->dynamic_offset_count); + + dynamic_slot += set_layout->dynamic_offset_count; } + + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; } } @@ -666,250 +572,23 @@ void anv_CmdBindVertexBuffers( } } -static void -add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, - struct anv_state state, struct anv_bo *bo, uint32_t offset) -{ - /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and - * 9 for gen8+. We only write the first dword for gen8+ here and rely on - * the initial state to set the high bits to 0. */ - - const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - - anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, - state.offset + dword * 4, bo, offset); -} - -const struct anv_format * -anv_format_for_descriptor_type(VkDescriptorType type) +enum isl_format +anv_isl_format_for_descriptor_type(VkDescriptorType type) { switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); + return ISL_FORMAT_R32G32B32A32_FLOAT; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); + return ISL_FORMAT_RAW; default: unreachable("Invalid descriptor type"); } } -VkResult -anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, - struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_bind_map *map; - uint32_t color_count, bias, state_offset; - - switch (stage) { - case MESA_SHADER_FRAGMENT: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = MAX_RTS; - color_count = subpass->color_count; - break; - case MESA_SHADER_COMPUTE: - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - bias = 1; - color_count = 0; - break; - default: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = 0; - color_count = 0; - break; - } - - if (color_count + map->surface_count == 0) { - *bt_state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + map->surface_count, - &state_offset); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t a = 0; a < color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); - } - - if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { - struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; - uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; - - struct anv_state surface_state; - surface_state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer); - - const struct anv_format *format = - anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, - format->isl_format, bo_offset, 12, 1); - - bt_map[0] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - - if (map->surface_count == 0) - goto out; - - if (map->image_count > 0) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); - if (result != VK_SUCCESS) - return result; - - cmd_buffer->state.push_constants_dirty |= 1 << stage; - } - - uint32_t image = 0; - for (uint32_t s = 0; s < map->surface_count; s++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - struct anv_state surface_state; - struct anv_bo *bo; - uint32_t bo_offset; - - switch (desc->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - /* Nothing for us to do here */ - continue; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - surface_state = desc->image_view->sampler_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - surface_state = desc->image_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, - image_param); - image_param->surface_idx = bias + s; - break; - } - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - surface_state = desc->buffer_view->surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - surface_state = desc->buffer_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, - image_param); - image_param->surface_idx = bias + s; - break; - - default: - assert(!"Invalid descriptor type"); - continue; - } - - bt_map[bias + s] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - assert(image == map->image_count); - - out: - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*bt_state); - - return VK_SUCCESS; -} - -VkResult -anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, struct anv_state *state) -{ - struct anv_pipeline_bind_map *map; - - if (stage == MESA_SHADER_COMPUTE) - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - else - map = &cmd_buffer->state.pipeline->bindings[stage]; - - if (map->sampler_count == 0) { - *state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - uint32_t size = map->sampler_count * 16; - *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t s = 0; s < map->sampler_count; s++) { - struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && - desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - continue; - - struct anv_sampler *sampler = desc->sampler; - - /* This can happen if we have an unfilled slot since TYPE_SAMPLER - * happens to be zero. - */ - if (sampler == NULL) - continue; - - memcpy(state->map + (s * 16), - sampler->state, sizeof(sampler->state)); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*state); - - return VK_SUCCESS; -} - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, const void *data, uint32_t size, uint32_t alignment) @@ -919,8 +598,7 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); memcpy(state.map, data, size); - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); @@ -941,56 +619,28 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; } -/** - * @brief Setup the command buffer for recording commands inside the given - * subpass. - * - * This does not record all commands needed for starting the subpass. - * Starting the subpass may require additional commands. - * - * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer - * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the - * command buffer for recording commands for some subpass. But only the first - * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. - */ -void -anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - switch (cmd_buffer->device->info.gen) { - case 7: - gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 8: - gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 9: - gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - default: - unreachable("unsupported gen\n"); - } -} - struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { + /* If we don't have this stage, bail. */ + if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage)) + return (struct anv_state) { .offset = 0 }; + struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; + const struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->shaders[stage]->prog_data; /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data->nr_params == 0) + if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) return (struct anv_state) { .offset = 0 }; struct anv_state state = @@ -1005,8 +655,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); } - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); return state; } @@ -1017,27 +666,17 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_push_constants *data = cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - const unsigned push_constant_data_size = - (local_id_dwords + prog_data->nr_params) * 4; - const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - const unsigned param_aligned_count = - reg_aligned_constant_size / sizeof(uint32_t); - /* If we don't actually have any push constants, bail. */ - if (reg_aligned_constant_size == 0) + if (cs_prog_data->push.total.size == 0) return (struct anv_state) { .offset = 0 }; - const unsigned threads = pipeline->cs_thread_width_max; - const unsigned total_push_constants_size = - reg_aligned_constant_size * threads; const unsigned push_constant_alignment = cmd_buffer->device->info.gen < 8 ? 32 : 64; const unsigned aligned_total_push_constants_size = - ALIGN(total_push_constants_size, push_constant_alignment); + ALIGN(cs_prog_data->push.total.size, push_constant_alignment); struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, aligned_total_push_constants_size, @@ -1046,25 +685,36 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) /* Walk through the param array and fill the buffer with data */ uint32_t *u32_map = state.map; - brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, - reg_aligned_constant_size); - - /* Setup uniform data for the first thread */ - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + if (cs_prog_data->push.cross_thread.size > 0) { + assert(cs_prog_data->thread_local_id_index < 0 || + cs_prog_data->thread_local_id_index >= + cs_prog_data->push.cross_thread.dwords); + for (unsigned i = 0; + i < cs_prog_data->push.cross_thread.dwords; + i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } } - /* Copy uniform data from the first thread to every other thread */ - const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); - for (unsigned t = 1; t < threads; t++) { - memcpy(&u32_map[t * param_aligned_count + local_id_dwords], - &u32_map[local_id_dwords], - uniform_data_size); + if (cs_prog_data->push.per_thread.size > 0) { + for (unsigned t = 0; t < cs_prog_data->threads; t++) { + unsigned dst = + 8 * (cs_prog_data->push.per_thread.regs * t + + cs_prog_data->push.cross_thread.regs); + unsigned src = cs_prog_data->push.cross_thread.dwords; + for ( ; src < prog_data->nr_params; src++, dst++) { + if (src != cs_prog_data->thread_local_id_index) { + uint32_t offset = (uintptr_t)prog_data->param[src]; + u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset); + } else { + u32_map[dst] = t * cs_prog_data->simd_size; + } + } + } } - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); return state; } @@ -1080,7 +730,11 @@ void anv_CmdPushConstants( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_foreach_stage(stage, stageFlags) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, + stage, client_data); + if (result != VK_SUCCESS) + return; memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, pValues, size); @@ -1089,24 +743,6 @@ void anv_CmdPushConstants( cmd_buffer->state.push_constants_dirty |= stageFlags; } -void anv_CmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - - assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - - anv_cmd_buffer_add_secondary(primary, secondary); - } -} - VkResult anv_CreateCommandPool( VkDevice _device, const VkCommandPoolCreateInfo* pCreateInfo, @@ -1116,7 +752,7 @@ VkResult anv_CreateCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; - pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1141,9 +777,15 @@ void anv_DestroyCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - anv_ResetCommandPool(_device, commandPool, 0); + if (!pool) + return; - anv_free2(&device->alloc, pAllocator, pool); + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + vk_free2(&device->alloc, pAllocator, pool); } VkResult anv_ResetCommandPool( @@ -1153,22 +795,22 @@ VkResult anv_ResetCommandPool( { ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - /* FIXME: vkResetCommandPool must not destroy its command buffers. The - * Vulkan 1.0 spec requires that it only reset them: - * - * Resetting a command pool recycles all of the resources from all of - * the command buffers allocated from the command pool back to the - * command pool. All command buffers that have been allocated from the - * command pool are put in the initial state. - */ - list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - anv_cmd_buffer_destroy(cmd_buffer); + list_for_each_entry(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_reset(cmd_buffer); } return VK_SUCCESS; } +void anv_TrimCommandPoolKHR( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlagsKHR flags) +{ + /* Nothing for us to do here. Our pools stay pretty tidy. */ +} + /** * Return NULL if the current subpass has no depthstencil attachment. */ @@ -1178,14 +820,134 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED) return NULL; const struct anv_image_view *iview = - fb->attachments[subpass->depth_stencil_attachment]; + fb->attachments[subpass->depth_stencil_attachment.attachment]; assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); return iview; } + +void anv_CmdPushDescriptorSetKHR( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t _set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + + assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS || + pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE); + assert(_set < MAX_SETS); + + const struct anv_descriptor_set_layout *set_layout = + layout->set[_set].layout; + struct anv_descriptor_set *set = &cmd_buffer->state.push_descriptor.set; + + set->layout = set_layout; + set->size = anv_descriptor_set_layout_size(set_layout); + set->buffer_count = set_layout->buffer_count; + set->buffer_views = cmd_buffer->state.push_descriptor.buffer_views; + + /* Go through the user supplied descriptors. */ + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + anv_descriptor_set_write_image_view(set, &cmd_buffer->device->info, + write->pImageInfo + j, + write->descriptorType, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + anv_descriptor_set_write_buffer_view(set, + write->descriptorType, + bview, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + assert(buffer); + + anv_descriptor_set_write_buffer(set, + cmd_buffer->device, + &cmd_buffer->surface_state_stream, + write->descriptorType, + buffer, + write->dstBinding, + write->dstArrayElement + j, + write->pBufferInfo[j].offset, + write->pBufferInfo[j].range); + } + break; + + default: + break; + } + } + + cmd_buffer->state.descriptors[_set] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; +} + +void anv_CmdPushDescriptorSetWithTemplateKHR( + VkCommandBuffer commandBuffer, + VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkPipelineLayout _layout, + uint32_t _set, + const void* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_descriptor_update_template, template, + descriptorUpdateTemplate); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + + assert(_set < MAX_PUSH_DESCRIPTORS); + + const struct anv_descriptor_set_layout *set_layout = + layout->set[_set].layout; + struct anv_descriptor_set *set = &cmd_buffer->state.push_descriptor.set; + + set->layout = set_layout; + set->size = anv_descriptor_set_layout_size(set_layout); + set->buffer_count = set_layout->buffer_count; + set->buffer_views = cmd_buffer->state.push_descriptor.buffer_views; + + anv_descriptor_set_write_template(set, + cmd_buffer->device, + &cmd_buffer->surface_state_stream, + template, + pData); + + cmd_buffer->state.descriptors[_set] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; +}