X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_cmd_buffer.c;h=1ca33f206aa82f41cbb877af7bf976f98d8f3362;hb=2b676b2ce87520042ffb1fae8a9c0d97ba0e3cbc;hp=d7e50db139756c4a006ec0065e357da1720fc556;hpb=f31ed6d0cd3c6b66304bb060a0c204ecf8621e6a;p=mesa.git diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d7e50db1397..1ca33f206aa 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -30,6 +30,7 @@ #include "anv_private.h" #include "vk_format_info.h" +#include "vk_util.h" /** \file anv_cmd_buffer.c * @@ -71,101 +72,121 @@ const struct anv_dynamic_state default_dynamic_state = { .front = 0u, .back = 0u, }, + .line_stipple = { + .factor = 0u, + .pattern = 0u, + }, }; -void +/** + * Copy the dynamic state from src to dest based on the copy_mask. + * + * Avoid copying states that have not changed, except for VIEWPORT, SCISSOR and + * BLEND_CONSTANTS (always copy them if they are in the copy_mask). + * + * Returns a mask of the states which changed. + */ +anv_cmd_dirty_mask_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, const struct anv_dynamic_state *src, - uint32_t copy_mask) + anv_cmd_dirty_mask_t copy_mask) { - if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + anv_cmd_dirty_mask_t changed = 0; + + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { dest->viewport.count = src->viewport.count; typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); + changed |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; } - if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) { dest->scissor.count = src->scissor.count; typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); + changed |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; } - if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) - dest->line_width = src->line_width; + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + changed |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; + } + +#define ANV_CMP_COPY(field, flag) \ + if (copy_mask & flag) { \ + if (dest->field != src->field) { \ + dest->field = src->field; \ + changed |= flag; \ + } \ + } - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) - dest->depth_bias = src->depth_bias; + ANV_CMP_COPY(line_width, ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH); - if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) - typed_memcpy(dest->blend_constants, src->blend_constants, 4); + ANV_CMP_COPY(depth_bias.bias, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + ANV_CMP_COPY(depth_bias.clamp, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + ANV_CMP_COPY(depth_bias.slope, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + + ANV_CMP_COPY(depth_bounds.min, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS); + ANV_CMP_COPY(depth_bounds.max, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS); - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) - dest->depth_bounds = src->depth_bounds; + ANV_CMP_COPY(stencil_compare_mask.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK); + ANV_CMP_COPY(stencil_compare_mask.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) - dest->stencil_compare_mask = src->stencil_compare_mask; + ANV_CMP_COPY(stencil_write_mask.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK); + ANV_CMP_COPY(stencil_write_mask.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) - dest->stencil_write_mask = src->stencil_write_mask; + ANV_CMP_COPY(stencil_reference.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE); + ANV_CMP_COPY(stencil_reference.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) - dest->stencil_reference = src->stencil_reference; + ANV_CMP_COPY(line_stipple.factor, ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE); + ANV_CMP_COPY(line_stipple.pattern, ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE); + +#undef ANV_CMP_COPY + + return changed; } static void -anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_state *state = &cmd_buffer->state; - memset(&state->descriptors, 0, sizeof(state->descriptors)); - memset(&state->push_constants, 0, sizeof(state->push_constants)); - memset(state->binding_tables, 0, sizeof(state->binding_tables)); - memset(state->samplers, 0, sizeof(state->samplers)); - - /* 0 isn't a valid config. This ensures that we always configure L3$. */ - cmd_buffer->state.current_l3_config = 0; - - state->dirty = 0; - state->vb_dirty = 0; - state->pending_pipe_bits = 0; - state->descriptors_dirty = 0; - state->push_constants_dirty = 0; - state->pipeline = NULL; - state->push_constant_stages = 0; - state->restart_index = UINT32_MAX; - state->dynamic = default_dynamic_state; - state->need_query_wa = true; - state->pma_fix_enabled = false; - state->hiz_enabled = false; - - if (state->attachments != NULL) { - vk_free(&cmd_buffer->pool->alloc, state->attachments); - state->attachments = NULL; - } + memset(state, 0, sizeof(*state)); - state->gen7.index_buffer = NULL; + state->current_pipeline = UINT32_MAX; + state->restart_index = UINT32_MAX; + state->gfx.dynamic = default_dynamic_state; } -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size) -{ - struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; - - if (*ptr == NULL) { - *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else if ((*ptr)->size < size) { - *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); +static void +anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer, + struct anv_cmd_pipeline_state *pipe_state) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) { + if (pipe_state->push_descriptors[i]) { + anv_descriptor_set_layout_unref(cmd_buffer->device, + pipe_state->push_descriptors[i]->set.layout); + vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]); + } } - (*ptr)->size = size; +} - return VK_SUCCESS; +static void +anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + + anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base); + anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base); + + vk_free(&cmd_buffer->pool->alloc, state->attachments); +} + +static void +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +{ + anv_cmd_state_finish(cmd_buffer); + anv_cmd_state_init(cmd_buffer); } static VkResult anv_create_cmd_buffer( @@ -182,20 +203,25 @@ static VkResult anv_create_cmd_buffer( if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + vk_object_base_init(&device->vk, &cmd_buffer->base, + VK_OBJECT_TYPE_COMMAND_BUFFER); + + cmd_buffer->batch.status = VK_SUCCESS; + cmd_buffer->device = device; cmd_buffer->pool = pool; cmd_buffer->level = level; - cmd_buffer->state.attachments = NULL; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) goto fail; anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); + &device->surface_state_pool, 4096); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); + &device->dynamic_state_pool, 16384); + + anv_cmd_state_init(cmd_buffer); if (pool) { list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); @@ -254,7 +280,9 @@ anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + anv_cmd_state_finish(cmd_buffer); + + vk_object_base_finish(&cmd_buffer->base); vk_free(&cmd_buffer->pool->alloc, cmd_buffer); } @@ -278,17 +306,17 @@ VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer) { cmd_buffer->usage_flags = 0; - cmd_buffer->state.current_pipeline = UINT32_MAX; + cmd_buffer->perf_query_pool = NULL; anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_reset(cmd_buffer); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_init(&cmd_buffer->surface_state_stream, - &cmd_buffer->device->surface_state_block_pool); + &cmd_buffer->device->surface_state_pool, 4096); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &cmd_buffer->device->dynamic_state_block_pool); + &cmd_buffer->device->dynamic_state_pool, 16384); return VK_SUCCESS; } @@ -300,22 +328,91 @@ VkResult anv_ResetCommandBuffer( return anv_cmd_buffer_reset(cmd_buffer); } +#define anv_genX_call(devinfo, func, ...) \ + switch ((devinfo)->gen) { \ + case 7: \ + if ((devinfo)->is_haswell) { \ + gen75_##func(__VA_ARGS__); \ + } else { \ + gen7_##func(__VA_ARGS__); \ + } \ + break; \ + case 8: \ + gen8_##func(__VA_ARGS__); \ + break; \ + case 9: \ + gen9_##func(__VA_ARGS__); \ + break; \ + case 10: \ + gen10_##func(__VA_ARGS__); \ + break; \ + case 11: \ + gen11_##func(__VA_ARGS__); \ + break; \ + case 12: \ + gen12_##func(__VA_ARGS__); \ + break; \ + default: \ + assert(!"Unknown hardware generation"); \ + } + void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - switch (cmd_buffer->device->info.gen) { - case 7: - if (cmd_buffer->device->info.is_haswell) - return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); - else - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - case 8: - return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); - case 9: - return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); - default: - unreachable("unsupported gen\n"); - } + anv_genX_call(&cmd_buffer->device->info, + cmd_buffer_emit_state_base_address, + cmd_buffer); +} + +void +anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + enum isl_aux_usage aux_usage, + uint32_t level, + uint32_t base_layer, + uint32_t layer_count) +{ + anv_genX_call(&cmd_buffer->device->info, + cmd_buffer_mark_image_written, + cmd_buffer, image, aspect, aux_usage, + level, base_layer, layer_count); +} + +void +anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer) +{ + anv_genX_call(&cmd_buffer->device->info, + cmd_emit_conditional_render_predicate, + cmd_buffer); +} + +static bool +mem_update(void *dst, const void *src, size_t size) +{ + if (memcmp(dst, src, size) == 0) + return false; + + memcpy(dst, src, size); + return true; +} + +static void +set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + const struct anv_pipeline_bind_map *map) +{ + if (mem_update(cmd_buffer->state.surface_sha1s[stage], + map->surface_sha1, sizeof(map->surface_sha1))) + cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage); + + if (mem_update(cmd_buffer->state.sampler_sha1s[stage], + map->sampler_sha1, sizeof(map->sampler_sha1))) + cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage); + + if (mem_update(cmd_buffer->state.push_sha1s[stage], + map->push_sha1, sizeof(map->push_sha1))) + cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage); } void anv_CmdBindPipeline( @@ -327,26 +424,41 @@ void anv_CmdBindPipeline( ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + case VK_PIPELINE_BIND_POINT_COMPUTE: { + struct anv_compute_pipeline *compute_pipeline = + anv_pipeline_to_compute(pipeline); + if (cmd_buffer->state.compute.pipeline == compute_pipeline) + return; + + cmd_buffer->state.compute.pipeline = compute_pipeline; + cmd_buffer->state.compute.pipeline_dirty = true; + set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, + &compute_pipeline->cs->bind_map); break; + } - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.pipeline = pipeline; - cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; - cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; + case VK_PIPELINE_BIND_POINT_GRAPHICS: { + struct anv_graphics_pipeline *gfx_pipeline = + anv_pipeline_to_graphics(pipeline); + if (cmd_buffer->state.gfx.pipeline == gfx_pipeline) + return; + + cmd_buffer->state.gfx.pipeline = gfx_pipeline; + cmd_buffer->state.gfx.vb_dirty |= gfx_pipeline->vb_used; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; + + anv_foreach_stage(stage, gfx_pipeline->active_stages) { + set_dirty_for_bind_map(cmd_buffer, stage, + &gfx_pipeline->shaders[stage]->bind_map); + } /* Apply the dynamic state from the pipeline */ - cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; - anv_dynamic_state_copy(&cmd_buffer->state.dynamic, - &pipeline->dynamic_state, - pipeline->dynamic_state_mask); + cmd_buffer->state.gfx.dirty |= + anv_dynamic_state_copy(&cmd_buffer->state.gfx.dynamic, + &gfx_pipeline->dynamic_state, + gfx_pipeline->dynamic_state_mask); break; + } default: assert(!"invalid bind point"); @@ -363,13 +475,13 @@ void anv_CmdSetViewport( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstViewport + viewportCount; - if (cmd_buffer->state.dynamic.viewport.count < total_count) - cmd_buffer->state.dynamic.viewport.count = total_count; + if (cmd_buffer->state.gfx.dynamic.viewport.count < total_count) + cmd_buffer->state.gfx.dynamic.viewport.count = total_count; - memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, + memcpy(cmd_buffer->state.gfx.dynamic.viewport.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports)); - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; } void anv_CmdSetScissor( @@ -381,13 +493,13 @@ void anv_CmdSetScissor( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstScissor + scissorCount; - if (cmd_buffer->state.dynamic.scissor.count < total_count) - cmd_buffer->state.dynamic.scissor.count = total_count; + if (cmd_buffer->state.gfx.dynamic.scissor.count < total_count) + cmd_buffer->state.gfx.dynamic.scissor.count = total_count; - memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, + memcpy(cmd_buffer->state.gfx.dynamic.scissor.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors)); - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; } void anv_CmdSetLineWidth( @@ -396,8 +508,8 @@ void anv_CmdSetLineWidth( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.line_width = lineWidth; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; + cmd_buffer->state.gfx.dynamic.line_width = lineWidth; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } void anv_CmdSetDepthBias( @@ -408,11 +520,11 @@ void anv_CmdSetDepthBias( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; - cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; - cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + cmd_buffer->state.gfx.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.gfx.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.gfx.dynamic.depth_bias.slope = depthBiasSlopeFactor; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; } void anv_CmdSetBlendConstants( @@ -421,10 +533,10 @@ void anv_CmdSetBlendConstants( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - memcpy(cmd_buffer->state.dynamic.blend_constants, + memcpy(cmd_buffer->state.gfx.dynamic.blend_constants, blendConstants, sizeof(float) * 4); - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; } void anv_CmdSetDepthBounds( @@ -434,10 +546,10 @@ void anv_CmdSetDepthBounds( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; - cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + cmd_buffer->state.gfx.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.gfx.dynamic.depth_bounds.max = maxDepthBounds; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; } void anv_CmdSetStencilCompareMask( @@ -448,11 +560,11 @@ void anv_CmdSetStencilCompareMask( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + cmd_buffer->state.gfx.dynamic.stencil_compare_mask.front = compareMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + cmd_buffer->state.gfx.dynamic.stencil_compare_mask.back = compareMask; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; } void anv_CmdSetStencilWriteMask( @@ -463,11 +575,11 @@ void anv_CmdSetStencilWriteMask( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + cmd_buffer->state.gfx.dynamic.stencil_write_mask.front = writeMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + cmd_buffer->state.gfx.dynamic.stencil_write_mask.back = writeMask; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; } void anv_CmdSetStencilReference( @@ -478,11 +590,103 @@ void anv_CmdSetStencilReference( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_reference.front = reference; + cmd_buffer->state.gfx.dynamic.stencil_reference.front = reference; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_reference.back = reference; + cmd_buffer->state.gfx.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +void anv_CmdSetLineStippleEXT( + VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.gfx.dynamic.line_stipple.factor = lineStippleFactor; + cmd_buffer->state.gfx.dynamic.line_stipple.pattern = lineStipplePattern; + + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; +} + +static void +anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct anv_pipeline_layout *layout, + uint32_t set_index, + struct anv_descriptor_set *set, + uint32_t *dynamic_offset_count, + const uint32_t **dynamic_offsets) +{ + struct anv_descriptor_set_layout *set_layout = + layout->set[set_index].layout; + + VkShaderStageFlags stages = set_layout->shader_stages; + struct anv_cmd_pipeline_state *pipe_state; + + switch (bind_point) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + stages &= VK_SHADER_STAGE_ALL_GRAPHICS; + pipe_state = &cmd_buffer->state.gfx.base; + break; + + case VK_PIPELINE_BIND_POINT_COMPUTE: + stages &= VK_SHADER_STAGE_COMPUTE_BIT; + pipe_state = &cmd_buffer->state.compute.base; + break; + + default: + unreachable("invalid bind point"); + } + + VkShaderStageFlags dirty_stages = 0; + if (pipe_state->descriptors[set_index] != set) { + pipe_state->descriptors[set_index] = set; + dirty_stages |= stages; + } + + /* If it's a push descriptor set, we have to flag things as dirty + * regardless of whether or not the CPU-side data structure changed as we + * may have edited in-place. + */ + if (set->pool == NULL) + dirty_stages |= stages; + + if (dynamic_offsets) { + if (set_layout->dynamic_offset_count > 0) { + uint32_t dynamic_offset_start = + layout->set[set_index].dynamic_offset_start; + + anv_foreach_stage(stage, stages) { + struct anv_push_constants *push = + &cmd_buffer->state.push_constants[stage]; + uint32_t *push_offsets = + &push->dynamic_offsets[dynamic_offset_start]; + + /* Assert that everything is in range */ + assert(set_layout->dynamic_offset_count <= *dynamic_offset_count); + assert(dynamic_offset_start + set_layout->dynamic_offset_count <= + ARRAY_SIZE(push->dynamic_offsets)); + + unsigned mask = set_layout->stage_dynamic_offsets[stage]; + STATIC_ASSERT(MAX_DYNAMIC_BUFFERS <= sizeof(mask) * 8); + while (mask) { + int i = u_bit_scan(&mask); + if (push_offsets[i] != (*dynamic_offsets)[i]) { + push_offsets[i] = (*dynamic_offsets)[i]; + dirty_stages |= mesa_to_vk_shader_stage(stage); + } + } + } + + *dynamic_offsets += set_layout->dynamic_offset_count; + *dynamic_offset_count -= set_layout->dynamic_offset_count; + } + } + + cmd_buffer->state.descriptors_dirty |= dirty_stages; + cmd_buffer->state.push_constants_dirty |= dirty_stages; } void anv_CmdBindDescriptorSets( @@ -497,46 +701,15 @@ void anv_CmdBindDescriptorSets( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - assert(firstSet + descriptorSetCount < MAX_SETS); + assert(firstSet + descriptorSetCount <= MAX_SETS); for (uint32_t i = 0; i < descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - if (cmd_buffer->state.descriptors[firstSet + i] != set) { - cmd_buffer->state.descriptors[firstSet + i] = set; - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - } - - if (set_layout->dynamic_offset_count > 0) { - anv_foreach_stage(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); - - struct anv_push_constants *push = - cmd_buffer->state.push_constants[s]; - - unsigned d = layout->set[firstSet + i].dynamic_offset_start; - const uint32_t *offsets = pDynamicOffsets; - struct anv_descriptor *desc = set->descriptors; - - for (unsigned b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index < 0) - continue; - - unsigned array_size = set_layout->binding[b].array_size; - for (unsigned j = 0; j < array_size; j++) { - push->dynamic[d].offset = *(offsets++); - push->dynamic[d].range = (desc->buffer_view) ? - desc->buffer_view->range : 0; - desc++; - d++; - } - } - } - cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - } + anv_cmd_buffer_bind_descriptor_set(cmd_buffer, pipelineBindPoint, + layout, firstSet + i, set, + &dynamicOffsetCount, + &pDynamicOffsets); } } @@ -553,11 +726,40 @@ void anv_CmdBindVertexBuffers( /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ - assert(firstBinding + bindingCount < MAX_VBS); + assert(firstBinding + bindingCount <= MAX_VBS); for (uint32_t i = 0; i < bindingCount; i++) { vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); vb[firstBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); + cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i); + } +} + +void anv_CmdBindTransformFeedbackBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS); + for (uint32_t i = 0; i < bindingCount; i++) { + if (pBuffers[i] == VK_NULL_HANDLE) { + xfb[firstBinding + i].buffer = NULL; + } else { + ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]); + xfb[firstBinding + i].buffer = buffer; + xfb[firstBinding + i].offset = pOffsets[i]; + xfb[firstBinding + i].size = + anv_buffer_get_range(buffer, pOffsets[i], + pSizes ? pSizes[i] : VK_WHOLE_SIZE); + } } } @@ -587,8 +789,6 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); memcpy(state.map, data, size); - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); return state; @@ -608,8 +808,6 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; @@ -619,32 +817,14 @@ struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { - /* If we don't have this stage, bail. */ - if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage)) - return (struct anv_state) { .offset = 0 }; - struct anv_push_constants *data = - cmd_buffer->state.push_constants[stage]; - const struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->shaders[stage]->prog_data; - - /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) - return (struct anv_state) { .offset = 0 }; + &cmd_buffer->state.push_constants[stage]; struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - prog_data->nr_params * sizeof(float), + sizeof(struct anv_push_constants), 32 /* bottom 5 bits MBZ */); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); - } - - anv_state_flush(cmd_buffer->device, state); + memcpy(state.map, data, sizeof(struct anv_push_constants)); return state; } @@ -653,58 +833,48 @@ struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) { struct anv_push_constants *data = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0]; - /* If we don't actually have any push constants, bail. */ - if (cs_prog_data->push.total.size == 0) + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); + const unsigned total_push_constants_size = + brw_cs_push_const_total_size(cs_prog_data, cs_params.threads); + if (total_push_constants_size == 0) return (struct anv_state) { .offset = 0 }; const unsigned push_constant_alignment = cmd_buffer->device->info.gen < 8 ? 32 : 64; const unsigned aligned_total_push_constants_size = - ALIGN(cs_prog_data->push.total.size, push_constant_alignment); + ALIGN(total_push_constants_size, push_constant_alignment); struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, aligned_total_push_constants_size, push_constant_alignment); - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; + void *dst = state.map; + const void *src = (char *)data + (range->start * 32); if (cs_prog_data->push.cross_thread.size > 0) { - assert(cs_prog_data->thread_local_id_index < 0 || - cs_prog_data->thread_local_id_index >= - cs_prog_data->push.cross_thread.dwords); - for (unsigned i = 0; - i < cs_prog_data->push.cross_thread.dwords; - i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); - } + memcpy(dst, src, cs_prog_data->push.cross_thread.size); + dst += cs_prog_data->push.cross_thread.size; + src += cs_prog_data->push.cross_thread.size; } if (cs_prog_data->push.per_thread.size > 0) { - for (unsigned t = 0; t < cs_prog_data->threads; t++) { - unsigned dst = - 8 * (cs_prog_data->push.per_thread.regs * t + - cs_prog_data->push.cross_thread.regs); - unsigned src = cs_prog_data->push.cross_thread.dwords; - for ( ; src < prog_data->nr_params; src++, dst++) { - if (src != cs_prog_data->thread_local_id_index) { - uint32_t offset = (uintptr_t)prog_data->param[src]; - u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset); - } else { - u32_map[dst] = t * cs_prog_data->simd_size; - } - } + for (unsigned t = 0; t < cs_params.threads; t++) { + memcpy(dst, src, cs_prog_data->push.per_thread.size); + + uint32_t *subgroup_id = dst + + offsetof(struct anv_push_constants, cs.subgroup_id) - + (range->start * 32 + cs_prog_data->push.cross_thread.size); + *subgroup_id = t; + + dst += cs_prog_data->push.per_thread.size; } } - anv_state_flush(cmd_buffer->device, state); - return state; } @@ -719,9 +889,7 @@ void anv_CmdPushConstants( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_foreach_stage(stage, stageFlags) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - - memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + memcpy(cmd_buffer->state.push_constants[stage].client_data + offset, pValues, size); } @@ -737,15 +905,17 @@ VkResult anv_CreateCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; - pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_COMMAND_POOL); + if (pAllocator) pool->alloc = *pAllocator; else - pool->alloc = device->alloc; + pool->alloc = device->vk.alloc; list_inithead(&pool->cmd_buffers); @@ -770,7 +940,8 @@ void anv_DestroyCommandPool( anv_cmd_buffer_destroy(cmd_buffer); } - vk_free2(&device->alloc, pAllocator, pool); + vk_object_base_finish(&pool->base); + vk_free2(&device->vk.alloc, pAllocator, pool); } VkResult anv_ResetCommandPool( @@ -788,10 +959,10 @@ VkResult anv_ResetCommandPool( return VK_SUCCESS; } -void anv_TrimCommandPoolKHR( +void anv_TrimCommandPool( VkDevice device, VkCommandPool commandPool, - VkCommandPoolTrimFlagsKHR flags) + VkCommandPoolTrimFlags flags) { /* Nothing for us to do here. Our pools stay pretty tidy. */ } @@ -803,16 +974,207 @@ const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + if (subpass->depth_stencil_attachment == NULL) return NULL; const struct anv_image_view *iview = - fb->attachments[subpass->depth_stencil_attachment]; + cmd_buffer->state.attachments[subpass->depth_stencil_attachment->attachment].image_view; assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); return iview; } + +static struct anv_descriptor_set * +anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct anv_descriptor_set_layout *layout, + uint32_t _set) +{ + struct anv_cmd_pipeline_state *pipe_state; + if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + pipe_state = &cmd_buffer->state.compute.base; + } else { + assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + pipe_state = &cmd_buffer->state.gfx.base; + } + + struct anv_push_descriptor_set **push_set = + &pipe_state->push_descriptors[_set]; + + if (*push_set == NULL) { + *push_set = vk_zalloc(&cmd_buffer->pool->alloc, + sizeof(struct anv_push_descriptor_set), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*push_set == NULL) { + anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + } + + struct anv_descriptor_set *set = &(*push_set)->set; + + if (set->layout != layout) { + if (set->layout) + anv_descriptor_set_layout_unref(cmd_buffer->device, set->layout); + anv_descriptor_set_layout_ref(layout); + set->layout = layout; + } + set->size = anv_descriptor_set_layout_size(layout); + set->buffer_view_count = layout->buffer_view_count; + set->buffer_views = (*push_set)->buffer_views; + + if (layout->descriptor_buffer_size && + ((*push_set)->set_used_on_gpu || + set->desc_mem.alloc_size < layout->descriptor_buffer_size)) { + /* The previous buffer is either actively used by some GPU command (so + * we can't modify it) or is too small. Allocate a new one. + */ + struct anv_state desc_mem = + anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + layout->descriptor_buffer_size, 32); + if (set->desc_mem.alloc_size) { + /* TODO: Do we really need to copy all the time? */ + memcpy(desc_mem.map, set->desc_mem.map, + MIN2(desc_mem.alloc_size, set->desc_mem.alloc_size)); + } + set->desc_mem = desc_mem; + + struct anv_address addr = { + .bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo, + .offset = set->desc_mem.offset, + }; + + const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; + set->desc_surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + isl_dev->ss.size, isl_dev->ss.align); + anv_fill_buffer_surface_state(cmd_buffer->device, + set->desc_surface_state, + ISL_FORMAT_R32G32B32A32_FLOAT, + addr, layout->descriptor_buffer_size, 1); + } + + return set; +} + +void anv_CmdPushDescriptorSetKHR( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t _set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + + assert(_set < MAX_SETS); + + struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; + + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, pipelineBindPoint, + set_layout, _set); + if (!set) + return; + + /* Go through the user supplied descriptors. */ + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + anv_descriptor_set_write_image_view(cmd_buffer->device, set, + write->pImageInfo + j, + write->descriptorType, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + anv_descriptor_set_write_buffer_view(cmd_buffer->device, set, + write->descriptorType, + bview, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + + anv_descriptor_set_write_buffer(cmd_buffer->device, set, + &cmd_buffer->surface_state_stream, + write->descriptorType, + buffer, + write->dstBinding, + write->dstArrayElement + j, + write->pBufferInfo[j].offset, + write->pBufferInfo[j].range); + } + break; + + default: + break; + } + } + + anv_cmd_buffer_bind_descriptor_set(cmd_buffer, pipelineBindPoint, + layout, _set, set, NULL, NULL); +} + +void anv_CmdPushDescriptorSetWithTemplateKHR( + VkCommandBuffer commandBuffer, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + VkPipelineLayout _layout, + uint32_t _set, + const void* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_descriptor_update_template, template, + descriptorUpdateTemplate); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + + assert(_set < MAX_PUSH_DESCRIPTORS); + + struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; + + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, template->bind_point, + set_layout, _set); + if (!set) + return; + + anv_descriptor_set_write_template(cmd_buffer->device, set, + &cmd_buffer->surface_state_stream, + template, + pData); + + anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point, + layout, _set, set, NULL, NULL); +} + +void anv_CmdSetDeviceMask( + VkCommandBuffer commandBuffer, + uint32_t deviceMask) +{ + /* No-op */ +}