X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_cmd_buffer.c;h=1ca33f206aa82f41cbb877af7bf976f98d8f3362;hb=2b676b2ce87520042ffb1fae8a9c0d97ba0e3cbc;hp=33687920a38e5ac1372d1492fba66f745439f330;hpb=af8535d02f9ca896292501cb4bb0e658c5229007;p=mesa.git diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 33687920a38..1ca33f206aa 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -30,6 +30,7 @@ #include "anv_private.h" #include "vk_format_info.h" +#include "vk_util.h" /** \file anv_cmd_buffer.c * @@ -71,45 +72,78 @@ const struct anv_dynamic_state default_dynamic_state = { .front = 0u, .back = 0u, }, + .line_stipple = { + .factor = 0u, + .pattern = 0u, + }, }; -void +/** + * Copy the dynamic state from src to dest based on the copy_mask. + * + * Avoid copying states that have not changed, except for VIEWPORT, SCISSOR and + * BLEND_CONSTANTS (always copy them if they are in the copy_mask). + * + * Returns a mask of the states which changed. + */ +anv_cmd_dirty_mask_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, const struct anv_dynamic_state *src, - uint32_t copy_mask) + anv_cmd_dirty_mask_t copy_mask) { - if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + anv_cmd_dirty_mask_t changed = 0; + + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { dest->viewport.count = src->viewport.count; typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); + changed |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; } - if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) { dest->scissor.count = src->scissor.count; typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); + changed |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; + } + + if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + changed |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; } - if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) - dest->line_width = src->line_width; +#define ANV_CMP_COPY(field, flag) \ + if (copy_mask & flag) { \ + if (dest->field != src->field) { \ + dest->field = src->field; \ + changed |= flag; \ + } \ + } - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) - dest->depth_bias = src->depth_bias; + ANV_CMP_COPY(line_width, ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH); - if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) - typed_memcpy(dest->blend_constants, src->blend_constants, 4); + ANV_CMP_COPY(depth_bias.bias, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + ANV_CMP_COPY(depth_bias.clamp, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + ANV_CMP_COPY(depth_bias.slope, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS); + + ANV_CMP_COPY(depth_bounds.min, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS); + ANV_CMP_COPY(depth_bounds.max, ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS); + + ANV_CMP_COPY(stencil_compare_mask.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK); + ANV_CMP_COPY(stencil_compare_mask.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK); - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) - dest->depth_bounds = src->depth_bounds; + ANV_CMP_COPY(stencil_write_mask.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK); + ANV_CMP_COPY(stencil_write_mask.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) - dest->stencil_compare_mask = src->stencil_compare_mask; + ANV_CMP_COPY(stencil_reference.front, ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE); + ANV_CMP_COPY(stencil_reference.back, ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) - dest->stencil_write_mask = src->stencil_write_mask; + ANV_CMP_COPY(line_stipple.factor, ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE); + ANV_CMP_COPY(line_stipple.pattern, ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE); - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) - dest->stencil_reference = src->stencil_reference; +#undef ANV_CMP_COPY + + return changed; } static void @@ -128,8 +162,13 @@ static void anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer, struct anv_cmd_pipeline_state *pipe_state) { - for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) - vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) { + if (pipe_state->push_descriptors[i]) { + anv_descriptor_set_layout_unref(cmd_buffer->device, + pipe_state->push_descriptors[i]->set.layout); + vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]); + } + } } static void @@ -140,9 +179,6 @@ anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer) anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base); anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base); - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) - vk_free(&cmd_buffer->pool->alloc, state->push_constants[i]); - vk_free(&cmd_buffer->pool->alloc, state->attachments); } @@ -153,32 +189,6 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) anv_cmd_state_init(cmd_buffer); } -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size) -{ - struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; - - if (*ptr == NULL) { - *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) { - anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - } else if ((*ptr)->size < size) { - *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) { - anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - } - (*ptr)->size = size; - - return VK_SUCCESS; -} - static VkResult anv_create_cmd_buffer( struct anv_device * device, struct anv_cmd_pool * pool, @@ -193,9 +203,11 @@ static VkResult anv_create_cmd_buffer( if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &cmd_buffer->base, + VK_OBJECT_TYPE_COMMAND_BUFFER); + cmd_buffer->batch.status = VK_SUCCESS; - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; cmd_buffer->pool = pool; cmd_buffer->level = level; @@ -270,6 +282,7 @@ anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) anv_cmd_state_finish(cmd_buffer); + vk_object_base_finish(&cmd_buffer->base); vk_free(&cmd_buffer->pool->alloc, cmd_buffer); } @@ -293,6 +306,7 @@ VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer) { cmd_buffer->usage_flags = 0; + cmd_buffer->perf_query_pool = NULL; anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_reset(cmd_buffer); @@ -335,6 +349,9 @@ VkResult anv_ResetCommandBuffer( case 11: \ gen11_##func(__VA_ARGS__); \ break; \ + case 12: \ + gen12_##func(__VA_ARGS__); \ + break; \ default: \ assert(!"Unknown hardware generation"); \ } @@ -362,6 +379,42 @@ anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, level, base_layer, layer_count); } +void +anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer) +{ + anv_genX_call(&cmd_buffer->device->info, + cmd_emit_conditional_render_predicate, + cmd_buffer); +} + +static bool +mem_update(void *dst, const void *src, size_t size) +{ + if (memcmp(dst, src, size) == 0) + return false; + + memcpy(dst, src, size); + return true; +} + +static void +set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + const struct anv_pipeline_bind_map *map) +{ + if (mem_update(cmd_buffer->state.surface_sha1s[stage], + map->surface_sha1, sizeof(map->surface_sha1))) + cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage); + + if (mem_update(cmd_buffer->state.sampler_sha1s[stage], + map->sampler_sha1, sizeof(map->sampler_sha1))) + cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage); + + if (mem_update(cmd_buffer->state.push_sha1s[stage], + map->push_sha1, sizeof(map->push_sha1))) + cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage); +} + void anv_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -371,26 +424,41 @@ void anv_CmdBindPipeline( ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute.base.pipeline = pipeline; + case VK_PIPELINE_BIND_POINT_COMPUTE: { + struct anv_compute_pipeline *compute_pipeline = + anv_pipeline_to_compute(pipeline); + if (cmd_buffer->state.compute.pipeline == compute_pipeline) + return; + + cmd_buffer->state.compute.pipeline = compute_pipeline; cmd_buffer->state.compute.pipeline_dirty = true; - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, + &compute_pipeline->cs->bind_map); break; + } - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.gfx.base.pipeline = pipeline; - cmd_buffer->state.gfx.vb_dirty |= pipeline->vb_used; + case VK_PIPELINE_BIND_POINT_GRAPHICS: { + struct anv_graphics_pipeline *gfx_pipeline = + anv_pipeline_to_graphics(pipeline); + if (cmd_buffer->state.gfx.pipeline == gfx_pipeline) + return; + + cmd_buffer->state.gfx.pipeline = gfx_pipeline; + cmd_buffer->state.gfx.vb_dirty |= gfx_pipeline->vb_used; cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; - cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; + + anv_foreach_stage(stage, gfx_pipeline->active_stages) { + set_dirty_for_bind_map(cmd_buffer, stage, + &gfx_pipeline->shaders[stage]->bind_map); + } /* Apply the dynamic state from the pipeline */ - cmd_buffer->state.gfx.dirty |= pipeline->dynamic_state_mask; - anv_dynamic_state_copy(&cmd_buffer->state.gfx.dynamic, - &pipeline->dynamic_state, - pipeline->dynamic_state_mask); + cmd_buffer->state.gfx.dirty |= + anv_dynamic_state_copy(&cmd_buffer->state.gfx.dynamic, + &gfx_pipeline->dynamic_state, + gfx_pipeline->dynamic_state_mask); break; + } default: assert(!"invalid bind point"); @@ -529,6 +597,19 @@ void anv_CmdSetStencilReference( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } +void anv_CmdSetLineStippleEXT( + VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.gfx.dynamic.line_stipple.factor = lineStippleFactor; + cmd_buffer->state.gfx.dynamic.line_stipple.pattern = lineStipplePattern; + + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; +} + static void anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, @@ -541,48 +622,71 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor_set_layout *set_layout = layout->set[set_index].layout; + VkShaderStageFlags stages = set_layout->shader_stages; struct anv_cmd_pipeline_state *pipe_state; - if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { - pipe_state = &cmd_buffer->state.compute.base; - } else { - assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + + switch (bind_point) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + stages &= VK_SHADER_STAGE_ALL_GRAPHICS; pipe_state = &cmd_buffer->state.gfx.base; + break; + + case VK_PIPELINE_BIND_POINT_COMPUTE: + stages &= VK_SHADER_STAGE_COMPUTE_BIT; + pipe_state = &cmd_buffer->state.compute.base; + break; + + default: + unreachable("invalid bind point"); + } + + VkShaderStageFlags dirty_stages = 0; + if (pipe_state->descriptors[set_index] != set) { + pipe_state->descriptors[set_index] = set; + dirty_stages |= stages; } - pipe_state->descriptors[set_index] = set; + + /* If it's a push descriptor set, we have to flag things as dirty + * regardless of whether or not the CPU-side data structure changed as we + * may have edited in-place. + */ + if (set->pool == NULL) + dirty_stages |= stages; if (dynamic_offsets) { if (set_layout->dynamic_offset_count > 0) { uint32_t dynamic_offset_start = layout->set[set_index].dynamic_offset_start; - /* Assert that everything is in range */ - assert(set_layout->dynamic_offset_count <= *dynamic_offset_count); - assert(dynamic_offset_start + set_layout->dynamic_offset_count <= - ARRAY_SIZE(pipe_state->dynamic_offsets)); - - typed_memcpy(&pipe_state->dynamic_offsets[dynamic_offset_start], - *dynamic_offsets, set_layout->dynamic_offset_count); + anv_foreach_stage(stage, stages) { + struct anv_push_constants *push = + &cmd_buffer->state.push_constants[stage]; + uint32_t *push_offsets = + &push->dynamic_offsets[dynamic_offset_start]; + + /* Assert that everything is in range */ + assert(set_layout->dynamic_offset_count <= *dynamic_offset_count); + assert(dynamic_offset_start + set_layout->dynamic_offset_count <= + ARRAY_SIZE(push->dynamic_offsets)); + + unsigned mask = set_layout->stage_dynamic_offsets[stage]; + STATIC_ASSERT(MAX_DYNAMIC_BUFFERS <= sizeof(mask) * 8); + while (mask) { + int i = u_bit_scan(&mask); + if (push_offsets[i] != (*dynamic_offsets)[i]) { + push_offsets[i] = (*dynamic_offsets)[i]; + dirty_stages |= mesa_to_vk_shader_stage(stage); + } + } + } *dynamic_offsets += set_layout->dynamic_offset_count; *dynamic_offset_count -= set_layout->dynamic_offset_count; } } - if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - } else { - assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); - cmd_buffer->state.descriptors_dirty |= - set_layout->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS; - } - - /* Pipeline layout objects are required to live at least while any command - * buffers that use them are in recording state. We need to grab a reference - * to the pipeline layout being bound here so we can compute correct dynamic - * offsets for VK_DESCRIPTOR_TYPE_*_DYNAMIC in dynamic_offset_for_binding() - * when we record draw commands that come after this. - */ - pipe_state->layout = layout; + cmd_buffer->state.descriptors_dirty |= dirty_stages; + cmd_buffer->state.push_constants_dirty |= dirty_stages; } void anv_CmdBindDescriptorSets( @@ -598,7 +702,7 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - assert(firstSet + descriptorSetCount < MAX_SETS); + assert(firstSet + descriptorSetCount <= MAX_SETS); for (uint32_t i = 0; i < descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); @@ -630,6 +734,35 @@ void anv_CmdBindVertexBuffers( } } +void anv_CmdBindTransformFeedbackBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS); + for (uint32_t i = 0; i < bindingCount; i++) { + if (pBuffers[i] == VK_NULL_HANDLE) { + xfb[firstBinding + i].buffer = NULL; + } else { + ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]); + xfb[firstBinding + i].buffer = buffer; + xfb[firstBinding + i].offset = pOffsets[i]; + xfb[firstBinding + i].size = + anv_buffer_get_range(buffer, pOffsets[i], + pSizes ? pSizes[i] : VK_WHOLE_SIZE); + } + } +} + enum isl_format anv_isl_format_for_descriptor_type(VkDescriptorType type) { @@ -656,8 +789,6 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); memcpy(state.map, data, size); - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); return state; @@ -677,69 +808,23 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; } -static uint32_t -anv_push_constant_value(struct anv_push_constants *data, uint32_t param) -{ - if (BRW_PARAM_IS_BUILTIN(param)) { - switch (param) { - case BRW_PARAM_BUILTIN_ZERO: - return 0; - case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X: - return data->base_work_group_id[0]; - case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y: - return data->base_work_group_id[1]; - case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z: - return data->base_work_group_id[2]; - default: - unreachable("Invalid param builtin"); - } - } else { - uint32_t offset = ANV_PARAM_PUSH_OFFSET(param); - assert(offset % sizeof(uint32_t) == 0); - if (offset < data->size) - return *(uint32_t *)((uint8_t *)data + offset); - else - return 0; - } -} - struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { - struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; - - /* If we don't have this stage, bail. */ - if (!anv_pipeline_has_stage(pipeline, stage)) - return (struct anv_state) { .offset = 0 }; - struct anv_push_constants *data = - cmd_buffer->state.push_constants[stage]; - const struct brw_stage_prog_data *prog_data = - pipeline->shaders[stage]->prog_data; - - /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) - return (struct anv_state) { .offset = 0 }; + &cmd_buffer->state.push_constants[stage]; struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - prog_data->nr_params * sizeof(float), + sizeof(struct anv_push_constants), 32 /* bottom 5 bits MBZ */); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) - u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); - - anv_state_flush(cmd_buffer->device, state); + memcpy(state.map, data, sizeof(struct anv_push_constants)); return state; } @@ -748,55 +833,48 @@ struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) { struct anv_push_constants *data = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; - struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; + &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0]; - /* If we don't actually have any push constants, bail. */ - if (cs_prog_data->push.total.size == 0) + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); + const unsigned total_push_constants_size = + brw_cs_push_const_total_size(cs_prog_data, cs_params.threads); + if (total_push_constants_size == 0) return (struct anv_state) { .offset = 0 }; const unsigned push_constant_alignment = cmd_buffer->device->info.gen < 8 ? 32 : 64; const unsigned aligned_total_push_constants_size = - ALIGN(cs_prog_data->push.total.size, push_constant_alignment); + ALIGN(total_push_constants_size, push_constant_alignment); struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, aligned_total_push_constants_size, push_constant_alignment); - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; + void *dst = state.map; + const void *src = (char *)data + (range->start * 32); if (cs_prog_data->push.cross_thread.size > 0) { - for (unsigned i = 0; - i < cs_prog_data->push.cross_thread.dwords; - i++) { - assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID); - u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); - } + memcpy(dst, src, cs_prog_data->push.cross_thread.size); + dst += cs_prog_data->push.cross_thread.size; + src += cs_prog_data->push.cross_thread.size; } if (cs_prog_data->push.per_thread.size > 0) { - for (unsigned t = 0; t < cs_prog_data->threads; t++) { - unsigned dst = - 8 * (cs_prog_data->push.per_thread.regs * t + - cs_prog_data->push.cross_thread.regs); - unsigned src = cs_prog_data->push.cross_thread.dwords; - for ( ; src < prog_data->nr_params; src++, dst++) { - if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) { - u32_map[dst] = t; - } else { - u32_map[dst] = - anv_push_constant_value(data, prog_data->param[src]); - } - } + for (unsigned t = 0; t < cs_params.threads; t++) { + memcpy(dst, src, cs_prog_data->push.per_thread.size); + + uint32_t *subgroup_id = dst + + offsetof(struct anv_push_constants, cs.subgroup_id) - + (range->start * 32 + cs_prog_data->push.cross_thread.size); + *subgroup_id = t; + + dst += cs_prog_data->push.per_thread.size; } } - anv_state_flush(cmd_buffer->device, state); - return state; } @@ -811,13 +889,7 @@ void anv_CmdPushConstants( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_foreach_stage(stage, stageFlags) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, - stage, client_data); - if (result != VK_SUCCESS) - return; - - memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + memcpy(cmd_buffer->state.push_constants[stage].client_data + offset, pValues, size); } @@ -833,15 +905,17 @@ VkResult anv_CreateCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; - pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_COMMAND_POOL); + if (pAllocator) pool->alloc = *pAllocator; else - pool->alloc = device->alloc; + pool->alloc = device->vk.alloc; list_inithead(&pool->cmd_buffers); @@ -866,7 +940,8 @@ void anv_DestroyCommandPool( anv_cmd_buffer_destroy(cmd_buffer); } - vk_free2(&device->alloc, pAllocator, pool); + vk_object_base_finish(&pool->base); + vk_free2(&device->vk.alloc, pAllocator, pool); } VkResult anv_ResetCommandPool( @@ -899,13 +974,12 @@ const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED) + if (subpass->depth_stencil_attachment == NULL) return NULL; const struct anv_image_view *iview = - fb->attachments[subpass->depth_stencil_attachment.attachment]; + cmd_buffer->state.attachments[subpass->depth_stencil_attachment->attachment].image_view; assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); @@ -913,10 +987,11 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) return iview; } -static struct anv_push_descriptor_set * -anv_cmd_buffer_get_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point, - uint32_t set) +static struct anv_descriptor_set * +anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct anv_descriptor_set_layout *layout, + uint32_t _set) { struct anv_cmd_pipeline_state *pipe_state; if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { @@ -927,19 +1002,62 @@ anv_cmd_buffer_get_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, } struct anv_push_descriptor_set **push_set = - &pipe_state->push_descriptors[set]; + &pipe_state->push_descriptors[_set]; if (*push_set == NULL) { - *push_set = vk_alloc(&cmd_buffer->pool->alloc, - sizeof(struct anv_push_descriptor_set), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + *push_set = vk_zalloc(&cmd_buffer->pool->alloc, + sizeof(struct anv_push_descriptor_set), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (*push_set == NULL) { anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return NULL; } } - return *push_set; + struct anv_descriptor_set *set = &(*push_set)->set; + + if (set->layout != layout) { + if (set->layout) + anv_descriptor_set_layout_unref(cmd_buffer->device, set->layout); + anv_descriptor_set_layout_ref(layout); + set->layout = layout; + } + set->size = anv_descriptor_set_layout_size(layout); + set->buffer_view_count = layout->buffer_view_count; + set->buffer_views = (*push_set)->buffer_views; + + if (layout->descriptor_buffer_size && + ((*push_set)->set_used_on_gpu || + set->desc_mem.alloc_size < layout->descriptor_buffer_size)) { + /* The previous buffer is either actively used by some GPU command (so + * we can't modify it) or is too small. Allocate a new one. + */ + struct anv_state desc_mem = + anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + layout->descriptor_buffer_size, 32); + if (set->desc_mem.alloc_size) { + /* TODO: Do we really need to copy all the time? */ + memcpy(desc_mem.map, set->desc_mem.map, + MIN2(desc_mem.alloc_size, set->desc_mem.alloc_size)); + } + set->desc_mem = desc_mem; + + struct anv_address addr = { + .bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo, + .offset = set->desc_mem.offset, + }; + + const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; + set->desc_surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + isl_dev->ss.size, isl_dev->ss.align); + anv_fill_buffer_surface_state(cmd_buffer->device, + set->desc_surface_state, + ISL_FORMAT_R32G32B32A32_FLOAT, + addr, layout->descriptor_buffer_size, 1); + } + + return set; } void anv_CmdPushDescriptorSetKHR( @@ -957,19 +1075,12 @@ void anv_CmdPushDescriptorSetKHR( struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; - struct anv_push_descriptor_set *push_set = - anv_cmd_buffer_get_push_descriptor_set(cmd_buffer, - pipelineBindPoint, _set); - if (!push_set) + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, pipelineBindPoint, + set_layout, _set); + if (!set) return; - struct anv_descriptor_set *set = &push_set->set; - - set->layout = set_layout; - set->size = anv_descriptor_set_layout_size(set_layout); - set->buffer_count = set_layout->buffer_count; - set->buffer_views = push_set->buffer_views; - /* Go through the user supplied descriptors. */ for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; @@ -981,7 +1092,7 @@ void anv_CmdPushDescriptorSetKHR( case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: for (uint32_t j = 0; j < write->descriptorCount; j++) { - anv_descriptor_set_write_image_view(set, &cmd_buffer->device->info, + anv_descriptor_set_write_image_view(cmd_buffer->device, set, write->pImageInfo + j, write->descriptorType, write->dstBinding, @@ -995,7 +1106,7 @@ void anv_CmdPushDescriptorSetKHR( ANV_FROM_HANDLE(anv_buffer_view, bview, write->pTexelBufferView[j]); - anv_descriptor_set_write_buffer_view(set, + anv_descriptor_set_write_buffer_view(cmd_buffer->device, set, write->descriptorType, bview, write->dstBinding, @@ -1008,12 +1119,9 @@ void anv_CmdPushDescriptorSetKHR( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->descriptorCount; j++) { - assert(write->pBufferInfo[j].buffer); ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); - assert(buffer); - anv_descriptor_set_write_buffer(set, - cmd_buffer->device, + anv_descriptor_set_write_buffer(cmd_buffer->device, set, &cmd_buffer->surface_state_stream, write->descriptorType, buffer, @@ -1049,21 +1157,13 @@ void anv_CmdPushDescriptorSetWithTemplateKHR( struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; - struct anv_push_descriptor_set *push_set = - anv_cmd_buffer_get_push_descriptor_set(cmd_buffer, - template->bind_point, _set); - if (!push_set) + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, template->bind_point, + set_layout, _set); + if (!set) return; - struct anv_descriptor_set *set = &push_set->set; - - set->layout = set_layout; - set->size = anv_descriptor_set_layout_size(set_layout); - set->buffer_count = set_layout->buffer_count; - set->buffer_views = push_set->buffer_views; - - anv_descriptor_set_write_template(set, - cmd_buffer->device, + anv_descriptor_set_write_template(cmd_buffer->device, set, &cmd_buffer->surface_state_stream, template, pData);